diff --git a/1.6/1.6/Assemblies/WulaFallenEmpire.dll b/1.6/1.6/Assemblies/WulaFallenEmpire.dll index 71358ee2..dee74753 100644 Binary files a/1.6/1.6/Assemblies/WulaFallenEmpire.dll and b/1.6/1.6/Assemblies/WulaFallenEmpire.dll differ diff --git a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs index 64492290..027cdf3d 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Collections.Generic; using System.Linq; using System.Text; @@ -248,7 +248,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - // 附加选中对象的上下文信息 + // ѡжϢ string messageWithContext = BuildUserMessageWithContext(text); _history.Add(("user", messageWithContext)); PersistHistory(); @@ -323,7 +323,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori _tools.Add(new Tool_SearchThingDef()); _tools.Add(new Tool_SearchPawnKind()); - // Agent 工具 - 纯视觉操作 (移除了 GetGameState, DesignateMine, DraftPawn) + // Agent - Ӿ (Ƴ GetGameState, DesignateMine, DraftPawn) if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true) { _tools.Add(new Tool_AnalyzeScreen()); @@ -498,7 +498,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori string actionWhitelist = phase == RequestPhase.ActionTools ? "ACTION PHASE VALID TAGS ONLY:\n" + ", , , , , , , , , , , \n" + - "INVALID EXAMPLES (do NOT use now): , , \n" + "INVALID EXAMPLES (do NOT use now): , JSON, Markdown Code Blocks\n" : string.Empty; return string.Join("\n\n", new[] @@ -843,7 +843,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori { if (string.IsNullOrEmpty(message)) return false; string[] keywords = new string[] { - "屏幕", "画面", "截图", "看", "找", "显示", // CN + "Ļ", "", "ͼ", "", "", "ʾ", // CN "screen", "screenshot", "image", "view", "look", "see", "find", "visual", "scan" // EN }; return keywords.Any(k => message.IndexOf(k, StringComparison.OrdinalIgnoreCase) >= 0); @@ -880,7 +880,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori if (settings.enableVlmFeatures && settings.showThinkingProcess) { // Optional: We can still say "Analyzing data link..." - AddAssistantMessage("[P.I.A] 正在分析数据链路..."); + AddAssistantMessage("[P.I.A] ڷ·..."); } var queryPhase = RequestPhase.QueryTools; @@ -914,7 +914,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori base64Image = queryResult.CapturedImage; if (settings.showThinkingProcess) { - AddAssistantMessage("[P.I.A] 视觉传感器已激活,图像已捕获..."); + AddAssistantMessage("[P.I.A] ӾѼͼѲ..."); } } @@ -965,7 +965,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori if (settings.showThinkingProcess) { - AddAssistantMessage("[P.I.A] 正在计算最优战术方案..."); + AddAssistantMessage("[P.I.A] ڼս..."); } var actionPhase = RequestPhase.ActionTools; @@ -998,7 +998,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "Preserve the intent of the previous output.\n" + "If the previous output indicates no action is needed or refuses action, output exactly: .\n" + "Do NOT invent new actions.\n" + - "Output VALID XML tool calls only. No natural language, no commentary.\n" + + "Output VALID XML tool calls only. No natural language, no commentary.\nIf the previous output contains JSON (including JSON code blocks or inline {\"point\": [x,y]}), convert it to XML.\n- For a point coordinate, output: .......\n- If coordinates are larger than 1 (e.g., 0-1000), normalize by dividing by 1000.\nIgnore any non-XML text.\n" + "Allowed tags: , , , , , , , .\n" + "\nAction tool XML formats:\n" + "- DefNameInt\n" + @@ -1073,7 +1073,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "Preserve the intent of the previous output.\n" + "If the previous output indicates no action is needed or refuses action, output exactly: .\n" + "Do NOT invent new actions.\n" + - "Output VALID XML tool calls only. No natural language, no commentary.\n" + + "Output VALID XML tool calls only. No natural language, no commentary.\nIf the previous output contains JSON (including JSON code blocks or inline {\"point\": [x,y]}), convert it to XML.\n- For a point coordinate, output: .......\n- If coordinates are larger than 1 (e.g., 0-1000), normalize by dividing by 1000.\nIgnore any non-XML text.\n" + "Allowed tags: , , , , , , , .\n" + "\nAction tool XML formats:\n" + "- DefNameInt\n" + @@ -1151,7 +1151,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori if (settings.showThinkingProcess) { - AddAssistantMessage("[P.I.A] 正在汇总战报并建立通讯记录..."); + AddAssistantMessage("[P.I.A] ڻսͨѶ¼..."); } // VISUAL CONTEXT FOR REPLY: Pass the image so the AI can describe what it sees. @@ -1212,6 +1212,53 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori string guidance = "ToolRunner Guidance: Reply to the player in natural language only. Do NOT output any XML. You may include [EXPR:n] to set expression (n=1-6)."; var matches = Regex.Matches(xml ?? "", @"<([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); + + // GEMINI 3 JSON FALLBACK (Restored & Fixed) + // If no XML found, try to parse JSON markdown block commonly output by Gemini 3 Flash Preview + // Uses a 0-1000 coordinate system typically. + if (matches.Count == 0 && (xml ?? "").Contains("```json")) + { + try + { + var jsonMatch = Regex.Match(xml, @"```json\s*(\[.*?\])\s*```", RegexOptions.Singleline); + if (jsonMatch.Success) + { + string jsonArr = jsonMatch.Groups[1].Value; + // Regex to extract objects with "point" (and optional "action") + // Matches: { "point": [123, 456] } + // Note: In verbatim string @"""", double quotes are escaped as "". Not \". + var pointMatches = Regex.Matches(jsonArr, @"\{.*?\""point\""\s*:\s*\[\s*([\d\.]+)\s*,\s*([\d\.]+)\s*\].*?\}", RegexOptions.Singleline); + + if (pointMatches.Count > 0) + { + StringBuilder synthesizedXml = new StringBuilder(); + foreach (Match pm in pointMatches) + { + if (float.TryParse(pm.Groups[1].Value, out float xVal) && float.TryParse(pm.Groups[2].Value, out float yVal)) + { + // Gemini uses 0-1000 scale usually + if (xVal > 1 || yVal > 1) + { + xVal /= 1000.0f; + yVal /= 1000.0f; + } + synthesizedXml.Append($"{xVal}{yVal}"); + } + } + if (synthesizedXml.Length > 0) + { + xml = synthesizedXml.ToString() + "\n" + xml; // Prepend to process downstream + matches = Regex.Matches(xml, @"<([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); + } + } + } + } + catch (Exception ex) + { + WulaLog.Debug($"[JSON Fallback Error] {ex.Message}"); + } + } + if (matches.Count == 0 || (matches.Count == 1 && matches[0].Groups[1].Value.Equals("no_action", StringComparison.OrdinalIgnoreCase))) { UpdatePhaseToolLedger(phase, false, new List()); @@ -1455,3 +1502,4 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } } } + diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md new file mode 100644 index 00000000..4f41eed2 --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md @@ -0,0 +1,83 @@ +# Wula AI x Gemini Integration: Technical Handover Document + +**Version**: 1.0 +**Date**: 2025-12-28 +**Author**: AntiGravity (Agent) +**Target Audience**: Codex / Future Maintainers + +--- + +## 1. Overview +This document details the specific challenges, bugs, and architectural decisions made to stabilize the integration between **WulaFallenEmpire** (RimWorld Mod) and **Gemini 3 / OpenAI-Compatible Agents**. It specifically addresses "stubborn" issues related to API format compliance, JSON construction, and multimodal context persistence. + +--- + +## 2. Critical Issues & Fixes + +### 2.1 The "Streaming" Trap (SSE Handling) +**Symptoms**: AI responses were truncated (e.g., only "Comman" displayed instead of "Commander"). +**Root Cause**: Even when `stream: false` is explicitly requested in the payload, some API providers (or reverse proxies wrapping Gemini) force a **Server-Sent Events (SSE)** response format (`data: {...}`). The original client only parsed the first line. +**Fix Implementation**: +- **File**: `SimpleAIClient.cs` -> `ExtractContent` +- **Logic**: Inspects response for `data:` prefix. If found, it iterates through **ALL** lines, strips `data:`, parses individual JSON chunks, and aggregates the `choices[0].delta.content` into a single string. +- **Defense**: This ensures compatibility with both standard JSON responses and forced Stream responses. + +### 2.2 The "Trailing Comma" Crash (HTTP 400) +**Symptoms**: AI actions failed silently or returned `400 Bad Request`. +**Root Cause**: In `SimpleAIClient.cs`, the JSON payload construction loop had a logic flaw. +- When filtering out `toolcall` roles inside the loop, the index `i` check `(i < messages.Count - 1)` failed to account for skipped items, leaving a trailing comma after the last valid item: `[{"role":"user",...},]` -> **Invalid JSON**. +- Additionally, if the message list was empty (or all items filtered), the comma after the System Message remained: `[{"role":"system",...},]` -> **Invalid JSON**. +**Fix Implementation**: +- **Logic**: + 1. Pre-filter `validMessages` into a separate list **before** JSON construction. + 2. Only append the comma after the System Message `if (validMessages.Count > 0)`. + 3. Iterate `validMessages` to guarantee correct comma placement between items. + +### 2.3 Gemini 3's "JSON Obsession" & The Dual-Defense Strategy +**Symptoms**: Gemini 3 Flash Preview ignores System Prompts demanding XML (``) and persistently outputs JSON (`[{"action":"click"...}]`). +**Root Cause**: RLHF tuning of newer models biases them heavily towards standard JSON tool-calling schemas, overriding prompt constraints. +**Strategy**: **"Principled Compromise"** (Double Defense). +1. **Layer 1 (Prompt)**: Explicitly list JSON and Markdown as `INVALID EXAMPLES` in `AIIntelligenceCore.cs`. This discourages compliance-oriented models from using them. +2. **Layer 2 (Code Fallback)**: If XML regex fails, the system attempts to parse **Markdown JSON Blocks** (` ```json ... ``` `). + - **File**: `AIIntelligenceCore.cs` -> `ExecuteXmlToolsForPhase` + - **Logic**: Extracts `point` arrays `[x, y]` and synthesizes a valid `` XML tag internally. + +### 2.4 The Coordinate System Mess +**Symptoms**: Clicks occurred off-screen or at (0,0). +**Root Cause**: +- Gemini 3 often returns coordinates in a **0-1000** scale (e.g., `[115, 982]`). +- Previous logic used `Screen.width` normalization, which is **not thread-safe** and caused crashes or incorrect scaling if the assumption was pixel coordinates. +**Fix Implementation**: +- **Logic**: In the JSON Fallback parser, if `x > 1` or `y > 1`, divide by **1000.0f**. This standardizes coordinates to the mod's required 0-1 proportional format. + +### 2.5 Visual Context Persistence (The "Blind Reply" Bug) +**Symptoms**: AI acted correctly (Phase 2) but "forgot" what it saw when replying to the user (Phase 3), or hallucinated headers. +**Root Cause**: +- Phase 3 (Reply) sends a message history ending with System Tool Results. +- `SimpleAIClient` only attached the image if the **very last message** was from `user`. +- Thus, in Phase 3, the image was dropped, rendering the AI blind. +**Fix Implementation**: +- **File**: `SimpleAIClient.cs` +- **Logic**: Instead of checking the last index, the code now searches **backwards** for the `lastUserIndex`. The image is attached to that specific user message, regardless of how many system messages follow it. + +--- + +## 3. Future Maintenance Guide + +### If Gemini 4 Breaks Format Again: +1. **Check `SimpleAIClient.cs`**: Ensure the JSON parser handles whatever new wrapper they add (e.g., nested `candidates`). +2. **Check `AIIntelligenceCore.cs`**: If it invents a new tool format (e.g., YAML), add a regex parser in `ExecuteXmlToolsForPhase` similar to the JSON Fallback. **Do not fight the model; adapt to it.** + +### If API Errors Return: +1. Enable `DevMode` in RimWorld. +2. Check `Player.log` for `[WulaAI] Request Payload`. +3. Copy the payload to a JSON Validator. **Look for trailing commas.** + +### Adding New Visual Tools: +1. Define tool in `Tools/`. +2. Update `GetToolSystemInstruction` whitelist. +3. **Crucially**: If the tool helps with **Action** (Silent), ensure `GetPhaseInstruction` enforces silence. If it helps with **Reply** (Descriptive), ensure it runs in Phase 3. + +--- + +**End of Handover.**