zc
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using Verse;
|
||||
|
||||
namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
@@ -11,7 +12,8 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
public abstract string Description { get; }
|
||||
public abstract string UsageSchema { get; } // XML schema description
|
||||
|
||||
public abstract string Execute(string args);
|
||||
public virtual string Execute(string args) => "Error: Synchronous execution not supported for this tool.";
|
||||
public virtual Task<string> ExecuteAsync(string args) => Task.FromResult(Execute(args));
|
||||
|
||||
/// <summary>
|
||||
/// Helper method to parse XML arguments into a dictionary.
|
||||
|
||||
@@ -11,27 +11,18 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
public override string Name => "analyze_screen";
|
||||
|
||||
public override string Description =>
|
||||
"分析当前游戏屏幕截图,了解玩家正在查看什么区域或内容。需要配置 VLM API 密钥。";
|
||||
"分析当前游戏屏幕截图。你可以提供具体的指令(instruction)告诉视觉模型你需要观察什么、寻找什么、或者如何描述屏幕。";
|
||||
|
||||
public override string UsageSchema =>
|
||||
"<analyze_screen><context>分析目标,如:玩家在看什么区域</context></analyze_screen>";
|
||||
"<analyze_screen><instruction>给视觉模型的具体指令。例如:'找到科研按钮的比例坐标' 或 '描述当前角色的健康状态栏内容'</instruction></analyze_screen>";
|
||||
|
||||
private const string VisionSystemPrompt = @"
|
||||
你是一个 RimWorld 游戏屏幕分析助手。分析截图并用简洁中文描述:
|
||||
- 玩家正在查看的区域(如:殖民地基地、世界地图、菜单界面)
|
||||
- 可见的重要建筑、角色、资源
|
||||
- 任何明显的问题或特殊状态
|
||||
保持回答简洁,不超过100字。不要使用 XML 标签。";
|
||||
private const string BaseVisionSystemPrompt = "你是一个专业的老练 RimWorld 助手。你会根据指示分析屏幕截图。保持回答专业且简洁。不要输出 XML 标签,除非被明确要求。";
|
||||
|
||||
public override string Execute(string args)
|
||||
public override async Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
// 由于 VLM API 调用是异步的,我们需要同步等待结果
|
||||
// 这在 Unity 主线程上可能会阻塞,但工具执行通常在异步上下文中调用
|
||||
try
|
||||
{
|
||||
var task = ExecuteInternalAsync(args);
|
||||
// 使用 GetAwaiter().GetResult() 来同步等待,避免死锁
|
||||
return task.GetAwaiter().GetResult();
|
||||
return await ExecuteInternalAsync(args);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
@@ -43,7 +34,9 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
private async Task<string> ExecuteInternalAsync(string xmlContent)
|
||||
{
|
||||
var argsDict = ParseXmlArgs(xmlContent);
|
||||
string context = argsDict.TryGetValue("context", out var ctx) ? ctx : "描述当前屏幕内容";
|
||||
// 优先使用 instruction,兼容旧的 context 参数
|
||||
string instruction = argsDict.TryGetValue("instruction", out var inst) ? inst :
|
||||
(argsDict.TryGetValue("context", out var ctx) ? ctx : "描述当前屏幕内容,重点关注 UI 状态和重要实体。");
|
||||
|
||||
try
|
||||
{
|
||||
@@ -75,11 +68,11 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel);
|
||||
|
||||
string result = await client.GetVisionCompletionAsync(
|
||||
VisionSystemPrompt,
|
||||
context,
|
||||
BaseVisionSystemPrompt,
|
||||
instruction,
|
||||
base64Image,
|
||||
maxTokens: 256,
|
||||
temperature: 0.3f
|
||||
maxTokens: 512, // 增加 token 数以支持更复杂的分析指令响应
|
||||
temperature: 0.2f
|
||||
);
|
||||
|
||||
if (string.IsNullOrEmpty(result))
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
using UnityEngine;
|
||||
using WulaFallenEmpire.EventSystem.AI.Agent;
|
||||
|
||||
namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
{
|
||||
@@ -14,12 +15,12 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
|
||||
public override string Description =>
|
||||
"在指定的屏幕位置执行鼠标点击。坐标使用比例值 (0-1),(0,0) 是左上角,(1,1) 是右下角。" +
|
||||
"适用于点击无法通过 API 操作的 mod 按钮或 UI 元素。先使用 analyze_screen 获取目标位置。";
|
||||
"适用于点击无法通过 API 操作的 mod 按钮或 UI 元素。先使用 analyze_screen 获取目标位置分析。";
|
||||
|
||||
public override string UsageSchema =>
|
||||
"<visual_click><x>0-1之间的X比例</x><y>0-1之间的Y比例</y><right_click>可选,true为右键</right_click></visual_click>";
|
||||
|
||||
public override string Execute(string args)
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -28,19 +29,19 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
// 解析 X 坐标
|
||||
if (!argsDict.TryGetValue("x", out string xStr) || !float.TryParse(xStr, out float x))
|
||||
{
|
||||
return "Error: 缺少有效的 x 坐标 (0-1之间的比例值)";
|
||||
return Task.FromResult("Error: 缺少有效的 x 坐标 (0-1之间的比例值)");
|
||||
}
|
||||
|
||||
// 解析 Y 坐标
|
||||
if (!argsDict.TryGetValue("y", out string yStr) || !float.TryParse(yStr, out float y))
|
||||
{
|
||||
return "Error: 缺少有效的 y 坐标 (0-1之间的比例值)";
|
||||
return Task.FromResult("Error: 缺少有效的 y 坐标 (0-1之间的比例值)");
|
||||
}
|
||||
|
||||
// 验证范围
|
||||
if (x < 0 || x > 1 || y < 0 || y > 1)
|
||||
{
|
||||
return $"Error: 坐标 ({x}, {y}) 超出范围,必须在 0-1 之间";
|
||||
return Task.FromResult($"Error: 坐标 ({x}, {y}) 超出范围,必须在 0-1 之间");
|
||||
}
|
||||
|
||||
// 解析右键选项
|
||||
@@ -60,17 +61,17 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
int screenY = Mathf.RoundToInt(y * Screen.height);
|
||||
|
||||
WulaLog.Debug($"[Tool_VisualClick] {clickType}点击 ({x:F3}, {y:F3}) -> 屏幕 ({screenX}, {screenY})");
|
||||
return $"Success: 已在屏幕位置 ({screenX}, {screenY}) 执行{clickType}点击";
|
||||
return Task.FromResult($"Success: 已在屏幕位置 ({screenX}, {screenY}) 执行{clickType}点击");
|
||||
}
|
||||
else
|
||||
{
|
||||
return "Error: 点击操作失败";
|
||||
return Task.FromResult("Error: 点击操作失败");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WulaLog.Debug($"[Tool_VisualClick] Error: {ex}");
|
||||
return $"Error: 点击操作失败 - {ex.Message}";
|
||||
return Task.FromResult($"Error: 点击操作失败 - {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -88,7 +89,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
public override string UsageSchema =>
|
||||
"<visual_type_text><text>要输入的文本</text></visual_type_text>";
|
||||
|
||||
public override string Execute(string args)
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -96,23 +97,23 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
|
||||
if (!argsDict.TryGetValue("text", out string text) || string.IsNullOrEmpty(text))
|
||||
{
|
||||
return "Error: 缺少要输入的文本";
|
||||
return Task.FromResult("Error: 缺少要输入的文本");
|
||||
}
|
||||
|
||||
// 使用剪贴板方式输入(支持中文)
|
||||
GUIUtility.systemCopyBuffer = text;
|
||||
// 获取当前鼠标位置
|
||||
var pos = MouseSimulator.GetCurrentPosition();
|
||||
|
||||
// 模拟 Ctrl+V 粘贴
|
||||
// 注意:这需要额外的键盘模拟实现
|
||||
// 暂时返回成功,实际使用时需要完善
|
||||
float propX = Mathf.Clamp01((float)pos.x / Screen.width);
|
||||
float propY = Mathf.Clamp01((float)pos.y / Screen.height);
|
||||
|
||||
WulaLog.Debug($"[Tool_VisualTypeText] 已将文本复制到剪贴板: {text}");
|
||||
return $"Success: 已将文本复制到剪贴板。请手动按 Ctrl+V 粘贴,或等待键盘模拟功能完善。";
|
||||
WulaLog.Debug($"[VisualTypeText] Current Pos: ({pos.x}, {pos.y}) -> Proportional: ({propX:F3}, {propY:F3})");
|
||||
|
||||
return Task.FromResult(VisualInteractionTools.TypeText(propX, propY, text));
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WulaLog.Debug($"[Tool_VisualTypeText] Error: {ex}");
|
||||
return $"Error: 输入文本失败 - {ex.Message}";
|
||||
return Task.FromResult($"Error: 输入文本失败 - {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -130,7 +131,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
public override string UsageSchema =>
|
||||
"<visual_scroll><delta>滚动量,正数向上负数向下</delta><x>可选,0-1 X坐标</x><y>可选,0-1 Y坐标</y></visual_scroll>";
|
||||
|
||||
public override string Execute(string args)
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -138,7 +139,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
|
||||
if (!argsDict.TryGetValue("delta", out string deltaStr) || !int.TryParse(deltaStr, out int delta))
|
||||
{
|
||||
return "Error: 缺少有效的 delta 值";
|
||||
return Task.FromResult("Error: 缺少有效的 delta 值");
|
||||
}
|
||||
|
||||
// 可选:先移动到指定位置
|
||||
@@ -154,12 +155,12 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
Agent.MouseSimulator.Scroll(delta);
|
||||
|
||||
string direction = delta > 0 ? "向上" : "向下";
|
||||
return $"Success: 已{direction}滚动 {Math.Abs(delta)} 单位";
|
||||
return Task.FromResult($"Success: 已{direction}滚动 {Math.Abs(delta)} 单位");
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WulaLog.Debug($"[Tool_VisualScroll] Error: {ex}");
|
||||
return $"Error: 滚动操作失败 - {ex.Message}";
|
||||
return Task.FromResult($"Error: 滚动操作失败 - {ex.Message}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
130
Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_VisualUtils.cs
Normal file
130
Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_VisualUtils.cs
Normal file
@@ -0,0 +1,130 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Threading.Tasks;
|
||||
using UnityEngine;
|
||||
using WulaFallenEmpire.EventSystem.AI.Agent;
|
||||
|
||||
namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
{
|
||||
public abstract class VisualToolBase : AITool
|
||||
{
|
||||
protected bool GetFloat(Dictionary<string, string> dict, string key, out float result)
|
||||
{
|
||||
result = 0f;
|
||||
if (dict.TryGetValue(key, out string val) && float.TryParse(val, out result))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public abstract override Task<string> ExecuteAsync(string args);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 视觉拖拽工具
|
||||
/// </summary>
|
||||
public class Tool_VisualDrag : VisualToolBase
|
||||
{
|
||||
public override string Name => "visual_drag";
|
||||
public override string Description => "从起始坐标拖拽到结束坐标。适用于框选单位、拖动滑块或地图。";
|
||||
public override string UsageSchema => "<visual_drag><start_x>0-1</start_x><start_y>0-1</start_y><end_x>0-1</end_x><end_y>0-1</end_y><duration>秒(默认0.5)</duration></visual_drag>";
|
||||
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
var dict = ParseXmlArgs(args);
|
||||
if (!GetFloat(dict, "start_x", out float sx) || !GetFloat(dict, "start_y", out float sy) ||
|
||||
!GetFloat(dict, "end_x", out float ex) || !GetFloat(dict, "end_y", out float ey))
|
||||
return Task.FromResult("Error: 缺少有效的坐标参数 (0-1)");
|
||||
|
||||
float duration = 0.5f;
|
||||
if (GetFloat(dict, "duration", out float d)) duration = d;
|
||||
|
||||
return Task.FromResult(VisualInteractionTools.MouseDrag(sx, sy, ex, ey, duration));
|
||||
}
|
||||
catch (Exception ex) { return Task.FromResult($"Error: {ex.Message}"); }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 视觉快捷键工具 (通用)
|
||||
/// </summary>
|
||||
public class Tool_VisualHotkey : VisualToolBase
|
||||
{
|
||||
public override string Name => "visual_hotkey";
|
||||
public override string Description => "在指定位置点击(可选)并按下快捷键。支持组合键如 'ctrl+c', 'alt+f4', 单键如 'enter', 'esc', 'r', 'space'。";
|
||||
public override string UsageSchema => "<visual_hotkey><key>快捷键</key><x>可选</x><y>可选</y></visual_hotkey>";
|
||||
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
var dict = ParseXmlArgs(args);
|
||||
string key = dict.ContainsKey("key") ? dict["key"] : "";
|
||||
if (string.IsNullOrEmpty(key)) return Task.FromResult("Error: 缺少 key 参数");
|
||||
|
||||
// 如果提供了坐标,先点击
|
||||
if (GetFloat(dict, "x", out float x) && GetFloat(dict, "y", out float y))
|
||||
{
|
||||
return Task.FromResult(VisualInteractionTools.PressHotkey(x, y, key));
|
||||
}
|
||||
else
|
||||
{
|
||||
// 在当前位置直接按键
|
||||
var pos = MouseSimulator.GetCurrentPosition();
|
||||
float propX = Mathf.Clamp01((float)pos.x / Screen.width);
|
||||
float propY = Mathf.Clamp01(1.0f - ((float)pos.y / Screen.height));
|
||||
return Task.FromResult(VisualInteractionTools.PressHotkey(propX, propY, key));
|
||||
}
|
||||
}
|
||||
catch (Exception ex) { return Task.FromResult($"Error: {ex.Message}"); }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 视觉等待工具
|
||||
/// </summary>
|
||||
public class Tool_VisualWait : VisualToolBase
|
||||
{
|
||||
public override string Name => "visual_wait";
|
||||
public override string Description => "等待指定时间。用于等待UI动画或加载。";
|
||||
public override string UsageSchema => "<visual_wait><seconds>秒数</seconds></visual_wait>";
|
||||
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
var dict = ParseXmlArgs(args);
|
||||
if (!GetFloat(dict, "seconds", out float seconds)) return Task.FromResult("Error: 缺少 seconds 参数");
|
||||
return Task.FromResult(VisualInteractionTools.Wait(seconds));
|
||||
}
|
||||
catch (Exception ex) { return Task.FromResult($"Error: {ex.Message}"); }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 视觉删除文本工具
|
||||
/// </summary>
|
||||
public class Tool_VisualDeleteText : VisualToolBase
|
||||
{
|
||||
public override string Name => "visual_delete_text";
|
||||
public override string Description => "点击指定位置并按 Backspace 删除指定数量的字符。用于清空输入框。";
|
||||
public override string UsageSchema => "<visual_delete_text><x>0-1</x><y>0-1</y><count>字符数(默认1)</count></visual_delete_text>";
|
||||
|
||||
public override Task<string> ExecuteAsync(string args)
|
||||
{
|
||||
try
|
||||
{
|
||||
var dict = ParseXmlArgs(args);
|
||||
if (!GetFloat(dict, "x", out float x) || !GetFloat(dict, "y", out float y))
|
||||
return Task.FromResult("Error: 缺少有效的坐标参数");
|
||||
|
||||
int count = 1;
|
||||
if (dict.TryGetValue("count", out string cStr) && int.TryParse(cStr, out int c)) count = c;
|
||||
|
||||
return Task.FromResult(VisualInteractionTools.DeleteText(x, y, count));
|
||||
}
|
||||
catch (Exception ex) { return Task.FromResult($"Error: {ex.Message}"); }
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user