diff --git a/1.6/1.6/Assemblies/WulaFallenEmpire.dll b/1.6/1.6/Assemblies/WulaFallenEmpire.dll index 15c1b131..23b3ffe5 100644 Binary files a/1.6/1.6/Assemblies/WulaFallenEmpire.dll and b/1.6/1.6/Assemblies/WulaFallenEmpire.dll differ diff --git a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs index 1122c30c..d4b4d392 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs @@ -69,6 +69,7 @@ namespace WulaFallenEmpire.EventSystem.AI public bool AnyToolSuccess; public bool AnyActionSuccess; public bool AnyActionError; + public string CapturedImage; } private const string DefaultPersona = @" @@ -471,7 +472,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori $"You will produce the natural-language reply later and MUST use: {language}."; } - private string GetToolSystemInstruction(RequestPhase phase) + private string GetToolSystemInstruction(RequestPhase phase, bool hasImage) { string phaseInstruction = GetPhaseInstruction(phase).TrimEnd(); string toolsForThisPhase = BuildToolsForPhase(phase); @@ -485,7 +486,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "Query tools exist but are disabled in this phase (not listed here).\n" : string.Empty; - if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true && WulaFallenEmpireMod.settings?.useNativeMultimodal == true) + if (hasImage && WulaFallenEmpireMod.settings?.enableVlmFeatures == true) { phaseInstruction += "\n- NATIVE MULTIMODAL: A current screenshot of the game is attached to this request. You can see the game state directly. Use it to determine coordinates for visual tools or to understand the context."; } @@ -833,6 +834,16 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori OnMessageReceived?.Invoke(cleanedResponse); } } + + private bool CheckVisualIntent(string message) + { + if (string.IsNullOrEmpty(message)) return false; + string[] keywords = new string[] { + "屏幕", "画面", "截图", "看", "找", "显示", // CN + "screen", "screenshot", "image", "view", "look", "see", "find", "visual", "scan" // EN + }; + return keywords.Any(k => message.IndexOf(k, StringComparison.OrdinalIgnoreCase) >= 0); + } private async Task RunPhasedRequestAsync() { if (_isThinking) return; @@ -858,18 +869,13 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori var client = new SimpleAIClient(apiKey, baseUrl, model, settings.useGeminiProtocol); _currentClient = client; - // 只有当启用了 VLM 特性,且开启了原生多模态模式时,才截图并在请求中包含图片 + // Model-Driven Vision: Start with null image. The model must ask for it using or if needed. string base64Image = null; - if (settings.enableVlmFeatures && settings.useNativeMultimodal) - { - base64Image = ScreenCaptureUtility.CaptureScreenAsBase64(); - if (settings.showThinkingProcess) - { - AddAssistantMessage("[P.I.A] 正在扫描当前战区情况..."); - } - } - else if (settings.showThinkingProcess) + + // If VLM is enabled, we allow the tool use. + if (settings.enableVlmFeatures && settings.showThinkingProcess) { + // Optional: We can still say "Analyzing data link..." AddAssistantMessage("[P.I.A] 正在分析数据链路..."); } @@ -879,8 +885,8 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori WulaLog.Debug($"[WulaAI] ===== Turn 1/3 ({queryPhase}) ====="); } - string queryInstruction = GetToolSystemInstruction(queryPhase); - string queryResponse = await client.GetChatCompletionAsync(queryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f, base64Image: base64Image); + string queryInstruction = GetToolSystemInstruction(queryPhase, !string.IsNullOrEmpty(base64Image)); + string queryResponse = await client.GetChatCompletionAsync(queryInstruction, BuildToolContext(queryPhase), maxTokens: 2048, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(queryResponse)) { AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate()); @@ -897,6 +903,16 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } PhaseExecutionResult queryResult = await ExecuteXmlToolsForPhase(queryResponse, queryPhase); + + // DATA FLOW: If Query Phase captured an image, propagate it to subsequent phases. + if (!string.IsNullOrEmpty(queryResult.CapturedImage)) + { + base64Image = queryResult.CapturedImage; + if (settings.showThinkingProcess) + { + AddAssistantMessage("[P.I.A] 视觉传感器已激活,图像已捕获..."); + } + } if (!queryResult.AnyToolSuccess && !_queryRetryUsed) { @@ -912,7 +928,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "Output the XML tag only and NOTHING else.\n" + "\nLast user request:\n" + lastUserMessage; - string retryDecision = await client.GetChatCompletionAsync(retryInstruction, new List<(string role, string message)>(), maxTokens: 16, temperature: 0.1f); + string retryDecision = await client.GetChatCompletionAsync(retryInstruction, new List<(string role, string message)>(), maxTokens: 256, temperature: 0.1f); if (!string.IsNullOrEmpty(retryDecision) && ShouldRetryTools(retryDecision)) { if (Prefs.DevMode) @@ -921,9 +937,9 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } SetThinkingPhase(1, true); - string retryQueryInstruction = GetToolSystemInstruction(queryPhase) + + string retryQueryInstruction = GetToolSystemInstruction(queryPhase, !string.IsNullOrEmpty(base64Image)) + "\n\n# RETRY\nYou chose to retry. Output XML tool calls only (or )."; - string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f, base64Image: base64Image); + string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 2048, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(retryQueryResponse)) { AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate()); @@ -955,9 +971,10 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } SetThinkingPhase(2, false); - string actionInstruction = GetToolSystemInstruction(actionPhase); + string actionInstruction = GetToolSystemInstruction(actionPhase, !string.IsNullOrEmpty(base64Image)); var actionContext = BuildToolContext(actionPhase, includeUser: true); - string actionResponse = await client.GetChatCompletionAsync(actionInstruction, actionContext, maxTokens: 128, temperature: 0.1f); + // Important: Pass base64Image to Action Phase as well if available, so visual_click works. + string actionResponse = await client.GetChatCompletionAsync(actionInstruction, actionContext, maxTokens: 2048, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(actionResponse)) { AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate()); @@ -991,7 +1008,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "- String (e.g. 'enter', 'esc', 'space')\n" + "- Float\n" + "\nPrevious output:\n" + TrimForPrompt(actionResponse, 600); - string fixedResponse = await client.GetChatCompletionAsync(fixInstruction, actionContext, maxTokens: 128, temperature: 0.1f); + string fixedResponse = await client.GetChatCompletionAsync(fixInstruction, actionContext, maxTokens: 2048, temperature: 0.1f); bool fixedHasXml = !string.IsNullOrEmpty(fixedResponse) && IsXmlToolCall(fixedResponse); bool fixedIsNoActionOnly = fixedHasXml && IsNoActionOnly(fixedResponse); bool fixedHasActionTool = fixedHasXml && HasActionToolCall(fixedResponse); @@ -1023,7 +1040,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "Output the XML tag only and NOTHING else.\n" + "\nLast user request:\n" + lastUserMessage; - string retryDecision = await client.GetChatCompletionAsync(retryInstruction, new List<(string role, string message)>(), maxTokens: 16, temperature: 0.1f); + string retryDecision = await client.GetChatCompletionAsync(retryInstruction, new List<(string role, string message)>(), maxTokens: 256, temperature: 0.1f); if (!string.IsNullOrEmpty(retryDecision) && ShouldRetryTools(retryDecision)) { if (Prefs.DevMode) @@ -1032,10 +1049,10 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } SetThinkingPhase(2, true); - string retryActionInstruction = GetToolSystemInstruction(actionPhase) + + string retryActionInstruction = GetToolSystemInstruction(actionPhase, !string.IsNullOrEmpty(base64Image)) + "\n\n# RETRY\nYou chose to retry. Output XML tool calls only (or )."; var retryActionContext = BuildToolContext(actionPhase, includeUser: true); - string retryActionResponse = await client.GetChatCompletionAsync(retryActionInstruction, retryActionContext, maxTokens: 128, temperature: 0.1f); + string retryActionResponse = await client.GetChatCompletionAsync(retryActionInstruction, retryActionContext, maxTokens: 2048, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(retryActionResponse)) { AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate()); @@ -1066,7 +1083,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "- String\n" + "- Float\n" + "\nPrevious output:\n" + TrimForPrompt(retryActionResponse, 600); - string retryFixedResponse = await client.GetChatCompletionAsync(retryFixInstruction, retryActionContext, maxTokens: 128, temperature: 0.1f); + string retryFixedResponse = await client.GetChatCompletionAsync(retryFixInstruction, retryActionContext, maxTokens: 2048, temperature: 0.1f); bool retryFixedHasXml = !string.IsNullOrEmpty(retryFixedResponse) && IsXmlToolCall(retryFixedResponse); bool retryFixedIsNoActionOnly = retryFixedHasXml && IsNoActionOnly(retryFixedResponse); bool retryFixedHasActionTool = retryFixedHasXml && HasActionToolCall(retryFixedResponse); @@ -1211,6 +1228,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori var nonActionToolsInActionPhase = new List(); StringBuilder combinedResults = new StringBuilder(); StringBuilder xmlOnlyBuilder = new StringBuilder(); + string capturedImageForPhase = null; bool countActionSuccessOnly = phase == RequestPhase.ActionTools; @@ -1231,6 +1249,18 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori continue; } + if (toolName.Equals("analyze_screen", StringComparison.OrdinalIgnoreCase) || toolName.Equals("capture_screen", StringComparison.OrdinalIgnoreCase)) + { + // Intercept Vision Request: Capture screen and return it. + // We skip the tool's internal execution to save time/tokens, as the purpose is just to get the image into the context. + capturedImageForPhase = ScreenCaptureUtility.CaptureScreenAsBase64(); + combinedResults.AppendLine($"Tool '{toolName}' Result: Screen captured successfully. Context updated for next phase."); + successfulToolCall = true; + successfulTools.Add(toolName); + executed++; + continue; + } + if (xmlOnlyBuilder.Length > 0) xmlOnlyBuilder.AppendLine().AppendLine(); xmlOnlyBuilder.Append(toolCallXml); @@ -1344,7 +1374,8 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori { AnyToolSuccess = successfulToolCall, AnyActionSuccess = successfulActions.Count > 0, - AnyActionError = failedActions.Count > 0 + AnyActionError = failedActions.Count > 0, + CapturedImage = capturedImageForPhase }; } diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs b/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs index ff4804bd..519b5105 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs @@ -130,8 +130,9 @@ namespace WulaFallenEmpire.EventSystem.AI.Agent string decision; string base64Image = null; - // 如果启用了视觉特性且开启了原生多模态,则在决策前截图 - if (settings.enableVlmFeatures && settings.useNativeMultimodal) + // 如果启用了视觉特性,则在决策前截图 (Autonomous Loop 默认认为是开启视觉即全自动,或者我们可以加逻辑判断,但暂时保持 VLM 开启即截图对于 Agent Loop 来说更合理,因为它需要时刻观察) + // 实际上,Agent Loop 通常需要全视觉,所以我们这里只检查 enableVlmFeatures + if (settings.enableVlmFeatures) { base64Image = ScreenCaptureUtility.CaptureScreenAsBase64(); if (settings.showThinkingProcess) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs b/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs index ce493d82..c6345057 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs @@ -6,6 +6,7 @@ using UnityEngine.Networking; using UnityEngine; using Verse; using System.Linq; +using System.Text.RegularExpressions; namespace WulaFallenEmpire.EventSystem.AI { @@ -27,12 +28,21 @@ namespace WulaFallenEmpire.EventSystem.AI public async Task GetChatCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null, string base64Image = null) { + // 1. Gemini Mode if (_useGemini) { - return await GetGeminiCompletionAsync(instruction, messages, maxTokens, temperature, base64Image); + string geminiResponse = await GetGeminiCompletionAsync(instruction, messages, maxTokens, temperature, base64Image); + + // Fallback: If failed and had image, retry without image + if (geminiResponse == null && !string.IsNullOrEmpty(base64Image)) + { + WulaLog.Debug("[WulaAI] [WARNING] Visual request failed (likely model incompatible). Retrying text-only..."); + return await GetGeminiCompletionAsync(instruction, messages, maxTokens, temperature, null); + } + return geminiResponse; } - // OpenAI / Compatible Mode + // 2. OpenAI / Compatible Mode if (string.IsNullOrEmpty(_baseUrl)) { WulaLog.Debug("[WulaAI] Base URL is missing."); @@ -83,11 +93,27 @@ namespace WulaFallenEmpire.EventSystem.AI } jsonBuilder.Append("]}"); - return await SendRequestAsync(endpoint, jsonBuilder.ToString(), _apiKey); + string response = await SendRequestAsync(endpoint, jsonBuilder.ToString(), _apiKey); + + // Fallback: If failed and had image, retry without image + if (response == null && !string.IsNullOrEmpty(base64Image)) + { + WulaLog.Debug("[WulaAI] [WARNING] Visual request failed (likely model incompatible). Retrying text-only..."); + return await GetChatCompletionAsync(instruction, messages, maxTokens, temperature, null); + } + return response; } private async Task GetGeminiCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null, string base64Image = null) { + // Ensure messages is not empty to avoid Gemini 400 Error (Invalid Argument) + if (messages == null) messages = new List<(string role, string message)>(); + if (messages.Count == 0) + { + // Gemini API 'contents' cannot be empty. We add a dummy prompt to trigger the model. + messages.Add(("user", "Start.")); + } + // Gemini API URL string baseUrl = _baseUrl; if (string.IsNullOrEmpty(baseUrl) || !baseUrl.Contains("googleapis.com")) @@ -141,7 +167,17 @@ namespace WulaFallenEmpire.EventSystem.AI { if (Prefs.DevMode) { - WulaLog.Debug($"[WulaAI] Sending request to {endpoint}"); + string logUrl = endpoint; + if (logUrl.Contains("key=")) + { + logUrl = Regex.Replace(logUrl, @"key=[^&]*", "key=[REDACTED]"); + } + WulaLog.Debug($"[WulaAI] Sending request to {logUrl}"); + + // Log request body (truncated to avoid spamming base64) + string logBody = jsonBody; + if (logBody.Length > 3000) logBody = logBody.Substring(0, 3000) + "... [Truncated]"; + WulaLog.Debug($"[WulaAI] Request Payload:\n{logBody}"); } using (UnityWebRequest request = new UnityWebRequest(endpoint, "POST")) @@ -167,6 +203,10 @@ namespace WulaFallenEmpire.EventSystem.AI } string response = request.downloadHandler.text; + if (Prefs.DevMode) + { + WulaLog.Debug($"[WulaAI] Response Body:\n{TruncateForLog(response)}"); + } return ExtractContent(response); } } @@ -268,7 +308,38 @@ namespace WulaFallenEmpire.EventSystem.AI else if (c == 't') sb.Append('\t'); else if (c == '"') sb.Append('"'); else if (c == '\\') sb.Append('\\'); - else sb.Append(c); + else if (c == 'u') + { + // Handle Unicode escape sequence \uXXXX + if (i + 4 < json.Length) + { + string hex = json.Substring(i + 1, 4); + if (int.TryParse(hex, System.Globalization.NumberStyles.HexNumber, System.Globalization.CultureInfo.InvariantCulture, out int charCode)) + { + sb.Append((char)charCode); + i += 4; + } + else + { + // Fallback if parsing fails + sb.Append("\\u"); + sb.Append(hex); + i += 4; + } + } + else + { + sb.Append("\\u"); + } + } + else if (c == '/') + { + sb.Append('/'); + } + else + { + sb.Append(c); + } escaped = false; } else diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetColonistStatus.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetColonistStatus.cs index 9e50294b..5c070219 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetColonistStatus.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetColonistStatus.cs @@ -132,7 +132,8 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools if (!showAllNeeds && !isLow) continue; string marker = isLow ? "!" : ""; - sb.Append($"{marker}{need.LabelCap} ({need.CurLevelPercentage:P0})"); + // Add explicit polarity to guide AI interpretation + sb.Append($"{marker}{need.LabelCap}: {need.CurLevelPercentage:P0} (Higher is Better)"); if (Prefs.DevMode && need.def != null) { sb.Append($"[{need.def.defName}]"); diff --git a/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs b/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs index 09244f86..32759aed 100644 --- a/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs +++ b/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs @@ -96,7 +96,6 @@ namespace WulaFallenEmpire if (settings.enableVlmFeatures) { - listingStandard.CheckboxLabeled("优先使用原生多模态模式", ref settings.useNativeMultimodal, "直接在思考阶段将截图发送给主模型(推荐,速度更快,需模型支持视角)"); listingStandard.CheckboxLabeled("在 UI 中显示中间思考过程", ref settings.showThinkingProcess, "显示 AI 执行工具时的状态反馈"); } diff --git a/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs b/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs index 6d68d269..04a39b11 100644 --- a/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs +++ b/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs @@ -19,7 +19,6 @@ namespace WulaFallenEmpire // 视觉功能配置 public bool enableVlmFeatures = false; - public bool useNativeMultimodal = true; // 默认启用原生多模态 public bool showThinkingProcess = true; // 是否显示中间思考过过程 public override void ExposeData() @@ -38,7 +37,6 @@ namespace WulaFallenEmpire // 简化后的视觉配置 Scribe_Values.Look(ref enableVlmFeatures, "enableVlmFeatures", false); - Scribe_Values.Look(ref useNativeMultimodal, "useNativeMultimodal", true); Scribe_Values.Look(ref showThinkingProcess, "showThinkingProcess", true); base.ExposeData(); diff --git a/Tools/mimo-v2-flash.txt b/Tools/mimo-v2-flash.txt new file mode 100644 index 00000000..e2148c53 --- /dev/null +++ b/Tools/mimo-v2-flash.txt @@ -0,0 +1,3 @@ +https://api.xiaomimimo.com/v1 +mimo-v2-flash +sk-cuwai2jix0zwrghj307pmvdpmtoc74j4uv9bejglxcs89tnx \ No newline at end of file