zc
This commit is contained in:
Binary file not shown.
@@ -1,4 +1,4 @@
|
|||||||
using System;
|
using System;
|
||||||
using System.Collections.Generic;
|
using System.Collections.Generic;
|
||||||
using System.Linq;
|
using System.Linq;
|
||||||
using System.Text;
|
using System.Text;
|
||||||
@@ -248,7 +248,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 附加选中对象的上下文信息
|
// 附加选中对象的上下文信息
|
||||||
string messageWithContext = BuildUserMessageWithContext(text);
|
string messageWithContext = BuildUserMessageWithContext(text);
|
||||||
_history.Add(("user", messageWithContext));
|
_history.Add(("user", messageWithContext));
|
||||||
PersistHistory();
|
PersistHistory();
|
||||||
@@ -323,7 +323,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
_tools.Add(new Tool_SearchThingDef());
|
_tools.Add(new Tool_SearchThingDef());
|
||||||
_tools.Add(new Tool_SearchPawnKind());
|
_tools.Add(new Tool_SearchPawnKind());
|
||||||
|
|
||||||
// Agent 工具 - 纯视觉操作 (移除了 GetGameState, DesignateMine, DraftPawn)
|
// Agent 工具 - 纯视觉操作 (移除了 GetGameState, DesignateMine, DraftPawn)
|
||||||
if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true)
|
if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true)
|
||||||
{
|
{
|
||||||
_tools.Add(new Tool_AnalyzeScreen());
|
_tools.Add(new Tool_AnalyzeScreen());
|
||||||
@@ -498,7 +498,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
string actionWhitelist = phase == RequestPhase.ActionTools
|
string actionWhitelist = phase == RequestPhase.ActionTools
|
||||||
? "ACTION PHASE VALID TAGS ONLY:\n" +
|
? "ACTION PHASE VALID TAGS ONLY:\n" +
|
||||||
"<spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <visual_drag>, <visual_hotkey>, <visual_wait>, <visual_delete_text>, <no_action/>\n" +
|
"<spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <visual_drag>, <visual_hotkey>, <visual_wait>, <visual_delete_text>, <no_action/>\n" +
|
||||||
"INVALID EXAMPLES (do NOT use now): <get_map_resources/>, <search_thing_def/>, <search_pawn_kind/>\n"
|
"INVALID EXAMPLES (do NOT use now): <get_map_resources/>, JSON, Markdown Code Blocks\n"
|
||||||
: string.Empty;
|
: string.Empty;
|
||||||
|
|
||||||
return string.Join("\n\n", new[]
|
return string.Join("\n\n", new[]
|
||||||
@@ -843,7 +843,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
{
|
{
|
||||||
if (string.IsNullOrEmpty(message)) return false;
|
if (string.IsNullOrEmpty(message)) return false;
|
||||||
string[] keywords = new string[] {
|
string[] keywords = new string[] {
|
||||||
"屏幕", "画面", "截图", "看", "找", "显示", // CN
|
"屏幕", "画面", "截图", "看", "找", "显示", // CN
|
||||||
"screen", "screenshot", "image", "view", "look", "see", "find", "visual", "scan" // EN
|
"screen", "screenshot", "image", "view", "look", "see", "find", "visual", "scan" // EN
|
||||||
};
|
};
|
||||||
return keywords.Any(k => message.IndexOf(k, StringComparison.OrdinalIgnoreCase) >= 0);
|
return keywords.Any(k => message.IndexOf(k, StringComparison.OrdinalIgnoreCase) >= 0);
|
||||||
@@ -880,7 +880,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
if (settings.enableVlmFeatures && settings.showThinkingProcess)
|
if (settings.enableVlmFeatures && settings.showThinkingProcess)
|
||||||
{
|
{
|
||||||
// Optional: We can still say "Analyzing data link..."
|
// Optional: We can still say "Analyzing data link..."
|
||||||
AddAssistantMessage("<i>[P.I.A] 正在分析数据链路...</i>");
|
AddAssistantMessage("<i>[P.I.A] 正在分析数据链路...</i>");
|
||||||
}
|
}
|
||||||
|
|
||||||
var queryPhase = RequestPhase.QueryTools;
|
var queryPhase = RequestPhase.QueryTools;
|
||||||
@@ -914,7 +914,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
base64Image = queryResult.CapturedImage;
|
base64Image = queryResult.CapturedImage;
|
||||||
if (settings.showThinkingProcess)
|
if (settings.showThinkingProcess)
|
||||||
{
|
{
|
||||||
AddAssistantMessage("<i>[P.I.A] 视觉传感器已激活,图像已捕获...</i>");
|
AddAssistantMessage("<i>[P.I.A] 视觉传感器已激活,图像已捕获...</i>");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -965,7 +965,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
|
|
||||||
if (settings.showThinkingProcess)
|
if (settings.showThinkingProcess)
|
||||||
{
|
{
|
||||||
AddAssistantMessage("<i>[P.I.A] 正在计算最优战术方案...</i>");
|
AddAssistantMessage("<i>[P.I.A] 正在计算最优战术方案...</i>");
|
||||||
}
|
}
|
||||||
|
|
||||||
var actionPhase = RequestPhase.ActionTools;
|
var actionPhase = RequestPhase.ActionTools;
|
||||||
@@ -998,7 +998,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
"Preserve the intent of the previous output.\n" +
|
"Preserve the intent of the previous output.\n" +
|
||||||
"If the previous output indicates no action is needed or refuses action, output exactly: <no_action/>.\n" +
|
"If the previous output indicates no action is needed or refuses action, output exactly: <no_action/>.\n" +
|
||||||
"Do NOT invent new actions.\n" +
|
"Do NOT invent new actions.\n" +
|
||||||
"Output VALID XML tool calls only. No natural language, no commentary.\n" +
|
"Output VALID XML tool calls only. No natural language, no commentary.\nIf the previous output contains JSON (including JSON code blocks or inline {\"point\": [x,y]}), convert it to XML.\n- For a point coordinate, output: <visual_click><x>...</x><y>...</y></visual_click>.\n- If coordinates are larger than 1 (e.g., 0-1000), normalize by dividing by 1000.\nIgnore any non-XML text.\n" +
|
||||||
"Allowed tags: <spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <no_action/>.\n" +
|
"Allowed tags: <spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <no_action/>.\n" +
|
||||||
"\nAction tool XML formats:\n" +
|
"\nAction tool XML formats:\n" +
|
||||||
"- <spawn_resources><items><item><name>DefName</name><count>Int</count></item></items></spawn_resources>\n" +
|
"- <spawn_resources><items><item><name>DefName</name><count>Int</count></item></items></spawn_resources>\n" +
|
||||||
@@ -1073,7 +1073,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
"Preserve the intent of the previous output.\n" +
|
"Preserve the intent of the previous output.\n" +
|
||||||
"If the previous output indicates no action is needed or refuses action, output exactly: <no_action/>.\n" +
|
"If the previous output indicates no action is needed or refuses action, output exactly: <no_action/>.\n" +
|
||||||
"Do NOT invent new actions.\n" +
|
"Do NOT invent new actions.\n" +
|
||||||
"Output VALID XML tool calls only. No natural language, no commentary.\n" +
|
"Output VALID XML tool calls only. No natural language, no commentary.\nIf the previous output contains JSON (including JSON code blocks or inline {\"point\": [x,y]}), convert it to XML.\n- For a point coordinate, output: <visual_click><x>...</x><y>...</y></visual_click>.\n- If coordinates are larger than 1 (e.g., 0-1000), normalize by dividing by 1000.\nIgnore any non-XML text.\n" +
|
||||||
"Allowed tags: <spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <no_action/>.\n" +
|
"Allowed tags: <spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <no_action/>.\n" +
|
||||||
"\nAction tool XML formats:\n" +
|
"\nAction tool XML formats:\n" +
|
||||||
"- <spawn_resources><items><item><name>DefName</name><count>Int</count></item></items></spawn_resources>\n" +
|
"- <spawn_resources><items><item><name>DefName</name><count>Int</count></item></items></spawn_resources>\n" +
|
||||||
@@ -1151,7 +1151,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
|
|
||||||
if (settings.showThinkingProcess)
|
if (settings.showThinkingProcess)
|
||||||
{
|
{
|
||||||
AddAssistantMessage("<i>[P.I.A] 正在汇总战报并建立通讯记录...</i>");
|
AddAssistantMessage("<i>[P.I.A] 正在汇总战报并建立通讯记录...</i>");
|
||||||
}
|
}
|
||||||
|
|
||||||
// VISUAL CONTEXT FOR REPLY: Pass the image so the AI can describe what it sees.
|
// VISUAL CONTEXT FOR REPLY: Pass the image so the AI can describe what it sees.
|
||||||
@@ -1212,6 +1212,53 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
string guidance = "ToolRunner Guidance: Reply to the player in natural language only. Do NOT output any XML. You may include [EXPR:n] to set expression (n=1-6).";
|
string guidance = "ToolRunner Guidance: Reply to the player in natural language only. Do NOT output any XML. You may include [EXPR:n] to set expression (n=1-6).";
|
||||||
|
|
||||||
var matches = Regex.Matches(xml ?? "", @"<([a-zA-Z0-9_]+)(?:>.*?</\1>|/>)", RegexOptions.Singleline);
|
var matches = Regex.Matches(xml ?? "", @"<([a-zA-Z0-9_]+)(?:>.*?</\1>|/>)", RegexOptions.Singleline);
|
||||||
|
|
||||||
|
// GEMINI 3 JSON FALLBACK (Restored & Fixed)
|
||||||
|
// If no XML found, try to parse JSON markdown block commonly output by Gemini 3 Flash Preview
|
||||||
|
// Uses a 0-1000 coordinate system typically.
|
||||||
|
if (matches.Count == 0 && (xml ?? "").Contains("```json"))
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
var jsonMatch = Regex.Match(xml, @"```json\s*(\[.*?\])\s*```", RegexOptions.Singleline);
|
||||||
|
if (jsonMatch.Success)
|
||||||
|
{
|
||||||
|
string jsonArr = jsonMatch.Groups[1].Value;
|
||||||
|
// Regex to extract objects with "point" (and optional "action")
|
||||||
|
// Matches: { "point": [123, 456] }
|
||||||
|
// Note: In verbatim string @"""", double quotes are escaped as "". Not \".
|
||||||
|
var pointMatches = Regex.Matches(jsonArr, @"\{.*?\""point\""\s*:\s*\[\s*([\d\.]+)\s*,\s*([\d\.]+)\s*\].*?\}", RegexOptions.Singleline);
|
||||||
|
|
||||||
|
if (pointMatches.Count > 0)
|
||||||
|
{
|
||||||
|
StringBuilder synthesizedXml = new StringBuilder();
|
||||||
|
foreach (Match pm in pointMatches)
|
||||||
|
{
|
||||||
|
if (float.TryParse(pm.Groups[1].Value, out float xVal) && float.TryParse(pm.Groups[2].Value, out float yVal))
|
||||||
|
{
|
||||||
|
// Gemini uses 0-1000 scale usually
|
||||||
|
if (xVal > 1 || yVal > 1)
|
||||||
|
{
|
||||||
|
xVal /= 1000.0f;
|
||||||
|
yVal /= 1000.0f;
|
||||||
|
}
|
||||||
|
synthesizedXml.Append($"<visual_click><x>{xVal}</x><y>{yVal}</y></visual_click>");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (synthesizedXml.Length > 0)
|
||||||
|
{
|
||||||
|
xml = synthesizedXml.ToString() + "\n" + xml; // Prepend to process downstream
|
||||||
|
matches = Regex.Matches(xml, @"<([a-zA-Z0-9_]+)(?:>.*?</\1>|/>)", RegexOptions.Singleline);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
WulaLog.Debug($"[JSON Fallback Error] {ex.Message}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (matches.Count == 0 || (matches.Count == 1 && matches[0].Groups[1].Value.Equals("no_action", StringComparison.OrdinalIgnoreCase)))
|
if (matches.Count == 0 || (matches.Count == 1 && matches[0].Groups[1].Value.Equals("no_action", StringComparison.OrdinalIgnoreCase)))
|
||||||
{
|
{
|
||||||
UpdatePhaseToolLedger(phase, false, new List<string>());
|
UpdatePhaseToolLedger(phase, false, new List<string>());
|
||||||
@@ -1455,3 +1502,4 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,83 @@
|
|||||||
|
# Wula AI x Gemini Integration: Technical Handover Document
|
||||||
|
|
||||||
|
**Version**: 1.0
|
||||||
|
**Date**: 2025-12-28
|
||||||
|
**Author**: AntiGravity (Agent)
|
||||||
|
**Target Audience**: Codex / Future Maintainers
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Overview
|
||||||
|
This document details the specific challenges, bugs, and architectural decisions made to stabilize the integration between **WulaFallenEmpire** (RimWorld Mod) and **Gemini 3 / OpenAI-Compatible Agents**. It specifically addresses "stubborn" issues related to API format compliance, JSON construction, and multimodal context persistence.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Critical Issues & Fixes
|
||||||
|
|
||||||
|
### 2.1 The "Streaming" Trap (SSE Handling)
|
||||||
|
**Symptoms**: AI responses were truncated (e.g., only "Comman" displayed instead of "Commander").
|
||||||
|
**Root Cause**: Even when `stream: false` is explicitly requested in the payload, some API providers (or reverse proxies wrapping Gemini) force a **Server-Sent Events (SSE)** response format (`data: {...}`). The original client only parsed the first line.
|
||||||
|
**Fix Implementation**:
|
||||||
|
- **File**: `SimpleAIClient.cs` -> `ExtractContent`
|
||||||
|
- **Logic**: Inspects response for `data:` prefix. If found, it iterates through **ALL** lines, strips `data:`, parses individual JSON chunks, and aggregates the `choices[0].delta.content` into a single string.
|
||||||
|
- **Defense**: This ensures compatibility with both standard JSON responses and forced Stream responses.
|
||||||
|
|
||||||
|
### 2.2 The "Trailing Comma" Crash (HTTP 400)
|
||||||
|
**Symptoms**: AI actions failed silently or returned `400 Bad Request`.
|
||||||
|
**Root Cause**: In `SimpleAIClient.cs`, the JSON payload construction loop had a logic flaw.
|
||||||
|
- When filtering out `toolcall` roles inside the loop, the index `i` check `(i < messages.Count - 1)` failed to account for skipped items, leaving a trailing comma after the last valid item: `[{"role":"user",...},]` -> **Invalid JSON**.
|
||||||
|
- Additionally, if the message list was empty (or all items filtered), the comma after the System Message remained: `[{"role":"system",...},]` -> **Invalid JSON**.
|
||||||
|
**Fix Implementation**:
|
||||||
|
- **Logic**:
|
||||||
|
1. Pre-filter `validMessages` into a separate list **before** JSON construction.
|
||||||
|
2. Only append the comma after the System Message `if (validMessages.Count > 0)`.
|
||||||
|
3. Iterate `validMessages` to guarantee correct comma placement between items.
|
||||||
|
|
||||||
|
### 2.3 Gemini 3's "JSON Obsession" & The Dual-Defense Strategy
|
||||||
|
**Symptoms**: Gemini 3 Flash Preview ignores System Prompts demanding XML (`<visual_click>`) and persistently outputs JSON (`[{"action":"click"...}]`).
|
||||||
|
**Root Cause**: RLHF tuning of newer models biases them heavily towards standard JSON tool-calling schemas, overriding prompt constraints.
|
||||||
|
**Strategy**: **"Principled Compromise"** (Double Defense).
|
||||||
|
1. **Layer 1 (Prompt)**: Explicitly list JSON and Markdown as `INVALID EXAMPLES` in `AIIntelligenceCore.cs`. This discourages compliance-oriented models from using them.
|
||||||
|
2. **Layer 2 (Code Fallback)**: If XML regex fails, the system attempts to parse **Markdown JSON Blocks** (` ```json ... ``` `).
|
||||||
|
- **File**: `AIIntelligenceCore.cs` -> `ExecuteXmlToolsForPhase`
|
||||||
|
- **Logic**: Extracts `point` arrays `[x, y]` and synthesizes a valid `<visual_click>` XML tag internally.
|
||||||
|
|
||||||
|
### 2.4 The Coordinate System Mess
|
||||||
|
**Symptoms**: Clicks occurred off-screen or at (0,0).
|
||||||
|
**Root Cause**:
|
||||||
|
- Gemini 3 often returns coordinates in a **0-1000** scale (e.g., `[115, 982]`).
|
||||||
|
- Previous logic used `Screen.width` normalization, which is **not thread-safe** and caused crashes or incorrect scaling if the assumption was pixel coordinates.
|
||||||
|
**Fix Implementation**:
|
||||||
|
- **Logic**: In the JSON Fallback parser, if `x > 1` or `y > 1`, divide by **1000.0f**. This standardizes coordinates to the mod's required 0-1 proportional format.
|
||||||
|
|
||||||
|
### 2.5 Visual Context Persistence (The "Blind Reply" Bug)
|
||||||
|
**Symptoms**: AI acted correctly (Phase 2) but "forgot" what it saw when replying to the user (Phase 3), or hallucinated headers.
|
||||||
|
**Root Cause**:
|
||||||
|
- Phase 3 (Reply) sends a message history ending with System Tool Results.
|
||||||
|
- `SimpleAIClient` only attached the image if the **very last message** was from `user`.
|
||||||
|
- Thus, in Phase 3, the image was dropped, rendering the AI blind.
|
||||||
|
**Fix Implementation**:
|
||||||
|
- **File**: `SimpleAIClient.cs`
|
||||||
|
- **Logic**: Instead of checking the last index, the code now searches **backwards** for the `lastUserIndex`. The image is attached to that specific user message, regardless of how many system messages follow it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Future Maintenance Guide
|
||||||
|
|
||||||
|
### If Gemini 4 Breaks Format Again:
|
||||||
|
1. **Check `SimpleAIClient.cs`**: Ensure the JSON parser handles whatever new wrapper they add (e.g., nested `candidates`).
|
||||||
|
2. **Check `AIIntelligenceCore.cs`**: If it invents a new tool format (e.g., YAML), add a regex parser in `ExecuteXmlToolsForPhase` similar to the JSON Fallback. **Do not fight the model; adapt to it.**
|
||||||
|
|
||||||
|
### If API Errors Return:
|
||||||
|
1. Enable `DevMode` in RimWorld.
|
||||||
|
2. Check `Player.log` for `[WulaAI] Request Payload`.
|
||||||
|
3. Copy the payload to a JSON Validator. **Look for trailing commas.**
|
||||||
|
|
||||||
|
### Adding New Visual Tools:
|
||||||
|
1. Define tool in `Tools/`.
|
||||||
|
2. Update `GetToolSystemInstruction` whitelist.
|
||||||
|
3. **Crucially**: If the tool helps with **Action** (Silent), ensure `GetPhaseInstruction` enforces silence. If it helps with **Reply** (Descriptive), ensure it runs in Phase 3.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**End of Handover.**
|
||||||
Reference in New Issue
Block a user