zc
This commit is contained in:
@@ -320,6 +320,12 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
||||
_tools.Add(new Tool_CallBombardment());
|
||||
_tools.Add(new Tool_SearchThingDef());
|
||||
_tools.Add(new Tool_SearchPawnKind());
|
||||
|
||||
// VLM 视觉分析工具 (条件性启用)
|
||||
if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true)
|
||||
{
|
||||
_tools.Add(new Tool_AnalyzeScreen());
|
||||
}
|
||||
}
|
||||
|
||||
private void SetThinkingState(bool isThinking)
|
||||
|
||||
@@ -0,0 +1,87 @@
|
||||
using System;
|
||||
using UnityEngine;
|
||||
|
||||
namespace WulaFallenEmpire.EventSystem.AI
|
||||
{
|
||||
/// <summary>
|
||||
/// Unity 屏幕截取工具类,用于 VLM 视觉分析
|
||||
/// </summary>
|
||||
public static class ScreenCaptureUtility
|
||||
{
|
||||
private const int MaxImageSize = 1024; // 限制图片大小以节省 API 费用
|
||||
|
||||
/// <summary>
|
||||
/// 截取当前屏幕并返回 Base64 编码的 PNG
|
||||
/// </summary>
|
||||
public static string CaptureScreenAsBase64()
|
||||
{
|
||||
try
|
||||
{
|
||||
// 使用 Unity 截屏
|
||||
Texture2D screenshot = ScreenCapture.CaptureScreenshotAsTexture();
|
||||
if (screenshot == null)
|
||||
{
|
||||
WulaLog.Debug("[ScreenCapture] CaptureScreenshotAsTexture returned null");
|
||||
return null;
|
||||
}
|
||||
|
||||
// 缩放以适配 API 限制
|
||||
Texture2D resized = ResizeTexture(screenshot, MaxImageSize);
|
||||
|
||||
// 编码为 PNG
|
||||
byte[] pngBytes = resized.EncodeToPNG();
|
||||
|
||||
// 清理资源
|
||||
UnityEngine.Object.Destroy(screenshot);
|
||||
if (resized != screenshot)
|
||||
{
|
||||
UnityEngine.Object.Destroy(resized);
|
||||
}
|
||||
|
||||
WulaLog.Debug($"[ScreenCapture] Captured {pngBytes.Length} bytes");
|
||||
return Convert.ToBase64String(pngBytes);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WulaLog.Debug($"[ScreenCapture] Failed: {ex.Message}");
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 缩放纹理到指定最大尺寸
|
||||
/// </summary>
|
||||
private static Texture2D ResizeTexture(Texture2D source, int maxSize)
|
||||
{
|
||||
int width = source.width;
|
||||
int height = source.height;
|
||||
|
||||
// 计算缩放比例
|
||||
if (width <= maxSize && height <= maxSize)
|
||||
{
|
||||
return source; // 无需缩放
|
||||
}
|
||||
|
||||
float ratio = (float)maxSize / Mathf.Max(width, height);
|
||||
int newWidth = Mathf.RoundToInt(width * ratio);
|
||||
int newHeight = Mathf.RoundToInt(height * ratio);
|
||||
|
||||
// 创建缩放后的纹理
|
||||
RenderTexture rt = RenderTexture.GetTemporary(newWidth, newHeight);
|
||||
Graphics.Blit(source, rt);
|
||||
|
||||
RenderTexture previous = RenderTexture.active;
|
||||
RenderTexture.active = rt;
|
||||
|
||||
Texture2D resized = new Texture2D(newWidth, newHeight, TextureFormat.RGB24, false);
|
||||
resized.ReadPixels(new Rect(0, 0, newWidth, newHeight), 0, 0);
|
||||
resized.Apply();
|
||||
|
||||
RenderTexture.active = previous;
|
||||
RenderTexture.ReleaseTemporary(rt);
|
||||
|
||||
WulaLog.Debug($"[ScreenCapture] Resized from {width}x{height} to {newWidth}x{newHeight}");
|
||||
return resized;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -346,5 +346,91 @@ namespace WulaFallenEmpire.EventSystem.AI
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 发送带图片的 VLM 视觉请求
|
||||
/// </summary>
|
||||
public async Task<string> GetVisionCompletionAsync(
|
||||
string systemPrompt,
|
||||
string userText,
|
||||
string base64Image,
|
||||
int maxTokens = 512,
|
||||
float temperature = 0.3f)
|
||||
{
|
||||
if (string.IsNullOrEmpty(_baseUrl))
|
||||
{
|
||||
WulaLog.Debug("[WulaAI] VLM: Base URL is missing.");
|
||||
return null;
|
||||
}
|
||||
|
||||
string endpoint = $"{_baseUrl}/chat/completions";
|
||||
if (_baseUrl.EndsWith("/chat/completions")) endpoint = _baseUrl;
|
||||
else if (!_baseUrl.EndsWith("/v1")) endpoint = $"{_baseUrl}/v1/chat/completions";
|
||||
|
||||
// Build VLM-specific JSON with image content
|
||||
StringBuilder jsonBuilder = new StringBuilder();
|
||||
jsonBuilder.Append("{");
|
||||
jsonBuilder.Append($"\"model\": \"{_model}\",");
|
||||
jsonBuilder.Append("\"stream\": false,");
|
||||
jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens)},");
|
||||
jsonBuilder.Append($"\"temperature\": {Mathf.Clamp(temperature, 0f, 2f).ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},");
|
||||
jsonBuilder.Append("\"messages\": [");
|
||||
|
||||
// System message
|
||||
if (!string.IsNullOrEmpty(systemPrompt))
|
||||
{
|
||||
jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(systemPrompt)}\"}},");
|
||||
}
|
||||
|
||||
// User message with image (multimodal content)
|
||||
jsonBuilder.Append("{\"role\": \"user\", \"content\": [");
|
||||
jsonBuilder.Append($"{{\"type\": \"text\", \"text\": \"{EscapeJson(userText)}\"}},");
|
||||
jsonBuilder.Append("{\"type\": \"image_url\", \"image_url\": {");
|
||||
jsonBuilder.Append($"\"url\": \"data:image/png;base64,{base64Image}\"");
|
||||
jsonBuilder.Append("}}");
|
||||
jsonBuilder.Append("]}");
|
||||
|
||||
jsonBuilder.Append("]}");
|
||||
|
||||
string jsonBody = jsonBuilder.ToString();
|
||||
if (Prefs.DevMode)
|
||||
{
|
||||
// Don't log the full base64 image
|
||||
WulaLog.Debug($"[WulaAI] VLM request to {endpoint} (model={_model}, imageSize={base64Image?.Length ?? 0} chars)");
|
||||
}
|
||||
|
||||
using (UnityWebRequest request = new UnityWebRequest(endpoint, "POST"))
|
||||
{
|
||||
byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);
|
||||
request.uploadHandler = new UploadHandlerRaw(bodyRaw);
|
||||
request.downloadHandler = new DownloadHandlerBuffer();
|
||||
request.SetRequestHeader("Content-Type", "application/json");
|
||||
request.timeout = 60; // VLM requests may take longer due to image processing
|
||||
if (!string.IsNullOrEmpty(_apiKey))
|
||||
{
|
||||
request.SetRequestHeader("Authorization", $"Bearer {_apiKey}");
|
||||
}
|
||||
|
||||
var operation = request.SendWebRequest();
|
||||
|
||||
while (!operation.isDone)
|
||||
{
|
||||
await Task.Delay(100);
|
||||
}
|
||||
|
||||
if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError)
|
||||
{
|
||||
WulaLog.Debug($"[WulaAI] VLM API Error: {request.error}");
|
||||
return null;
|
||||
}
|
||||
|
||||
string responseText = request.downloadHandler.text;
|
||||
if (Prefs.DevMode)
|
||||
{
|
||||
WulaLog.Debug($"[WulaAI] VLM Response (truncated): {TruncateForLog(responseText)}");
|
||||
}
|
||||
return ExtractContent(responseText);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
using System;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace WulaFallenEmpire.EventSystem.AI.Tools
|
||||
{
|
||||
/// <summary>
|
||||
/// VLM 视觉分析工具 - 截取游戏屏幕并使用视觉语言模型分析
|
||||
/// </summary>
|
||||
public class Tool_AnalyzeScreen : AITool
|
||||
{
|
||||
public override string Name => "analyze_screen";
|
||||
|
||||
public override string Description =>
|
||||
"分析当前游戏屏幕截图,了解玩家正在查看什么区域或内容。需要配置 VLM API 密钥。";
|
||||
|
||||
public override string UsageSchema =>
|
||||
"<analyze_screen><context>分析目标,如:玩家在看什么区域</context></analyze_screen>";
|
||||
|
||||
private const string VisionSystemPrompt = @"
|
||||
你是一个 RimWorld 游戏屏幕分析助手。分析截图并用简洁中文描述:
|
||||
- 玩家正在查看的区域(如:殖民地基地、世界地图、菜单界面)
|
||||
- 可见的重要建筑、角色、资源
|
||||
- 任何明显的问题或特殊状态
|
||||
保持回答简洁,不超过100字。不要使用 XML 标签。";
|
||||
|
||||
public override string Execute(string args)
|
||||
{
|
||||
// 由于 VLM API 调用是异步的,我们需要同步等待结果
|
||||
// 这在 Unity 主线程上可能会阻塞,但工具执行通常在异步上下文中调用
|
||||
try
|
||||
{
|
||||
var task = ExecuteInternalAsync(args);
|
||||
// 使用 GetAwaiter().GetResult() 来同步等待,避免死锁
|
||||
return task.GetAwaiter().GetResult();
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WulaLog.Debug($"[Tool_AnalyzeScreen] Execute error: {ex}");
|
||||
return $"视觉分析出错: {ex.Message}";
|
||||
}
|
||||
}
|
||||
|
||||
private async Task<string> ExecuteInternalAsync(string xmlContent)
|
||||
{
|
||||
var argsDict = ParseXmlArgs(xmlContent);
|
||||
string context = argsDict.TryGetValue("context", out var ctx) ? ctx : "描述当前屏幕内容";
|
||||
|
||||
try
|
||||
{
|
||||
// 检查 VLM 配置
|
||||
var settings = WulaFallenEmpireMod.settings;
|
||||
if (settings == null)
|
||||
{
|
||||
return "Mod 设置未初始化。";
|
||||
}
|
||||
|
||||
// 使用主 API 密钥(如果没有单独配置 VLM 密钥)
|
||||
string vlmApiKey = !string.IsNullOrEmpty(settings.vlmApiKey) ? settings.vlmApiKey : settings.apiKey;
|
||||
string vlmBaseUrl = !string.IsNullOrEmpty(settings.vlmBaseUrl) ? settings.vlmBaseUrl : "https://dashscope.aliyuncs.com/compatible-mode/v1";
|
||||
string vlmModel = !string.IsNullOrEmpty(settings.vlmModel) ? settings.vlmModel : "qwen-vl-plus";
|
||||
|
||||
if (string.IsNullOrEmpty(vlmApiKey))
|
||||
{
|
||||
return "VLM API 密钥未配置。请在 Mod 设置中配置 API 密钥。";
|
||||
}
|
||||
|
||||
// 截取屏幕
|
||||
string base64Image = ScreenCaptureUtility.CaptureScreenAsBase64();
|
||||
if (string.IsNullOrEmpty(base64Image))
|
||||
{
|
||||
return "截屏失败,无法分析屏幕。";
|
||||
}
|
||||
|
||||
// 调用 VLM API
|
||||
var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel);
|
||||
|
||||
string result = await client.GetVisionCompletionAsync(
|
||||
VisionSystemPrompt,
|
||||
context,
|
||||
base64Image,
|
||||
maxTokens: 256,
|
||||
temperature: 0.3f
|
||||
);
|
||||
|
||||
if (string.IsNullOrEmpty(result))
|
||||
{
|
||||
return "VLM 分析无响应,请检查 API 配置。";
|
||||
}
|
||||
|
||||
return $"屏幕分析结果: {result.Trim()}";
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
WulaLog.Debug($"[Tool_AnalyzeScreen] Error: {ex}");
|
||||
return $"视觉分析出错: {ex.Message}";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user