diff --git a/1.6/1.6/Assemblies/WulaFallenEmpire.dll b/1.6/1.6/Assemblies/WulaFallenEmpire.dll index 32333cdc..69678215 100644 Binary files a/1.6/1.6/Assemblies/WulaFallenEmpire.dll and b/1.6/1.6/Assemblies/WulaFallenEmpire.dll differ diff --git a/1.6/1.6/Assemblies/WulaFallenEmpire.pdb b/1.6/1.6/Assemblies/WulaFallenEmpire.pdb index c2f038ab..616a8f91 100644 Binary files a/1.6/1.6/Assemblies/WulaFallenEmpire.pdb and b/1.6/1.6/Assemblies/WulaFallenEmpire.pdb differ diff --git a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs index cacbd290..cda004f1 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs @@ -49,6 +49,7 @@ namespace WulaFallenEmpire.EventSystem.AI private readonly HashSet _actionSuccessLedgerSet = new HashSet(StringComparer.OrdinalIgnoreCase); private readonly List _actionFailedLedger = new List(); private readonly HashSet _actionFailedLedgerSet = new HashSet(StringComparer.OrdinalIgnoreCase); + private SimpleAIClient _currentClient; private const int DefaultMaxHistoryTokens = 100000; private const int CharsPerToken = 4; @@ -483,6 +484,12 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "If no action is required, output exactly: .\n" + "Query tools exist but are disabled in this phase (not listed here).\n" : string.Empty; + + if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true && WulaFallenEmpireMod.settings?.useNativeMultimodal == true) + { + phaseInstruction += "\n- NATIVE MULTIMODAL: A current screenshot of the game is attached to this request. You can see the game state directly. Use it to determine coordinates for visual tools or to understand the context."; + } + string actionWhitelist = phase == RequestPhase.ActionTools ? "ACTION PHASE VALID TAGS ONLY:\n" + ", , , , , , , , , , , \n" + @@ -844,7 +851,23 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model); + var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model, settings.useGeminiProtocol); + _currentClient = client; + + // 只有当启用了 VLM 特性,且开启了原生多模态模式时,才截图并在请求中包含图片 + string base64Image = null; + if (settings.enableVlmFeatures && settings.useNativeMultimodal) + { + base64Image = ScreenCaptureUtility.CaptureScreenAsBase64(); + if (settings.showThinkingProcess) + { + AddAssistantMessage("[P.I.A] 正在扫描当前战区情况..."); + } + } + else if (settings.showThinkingProcess) + { + AddAssistantMessage("[P.I.A] 正在分析数据链路..."); + } var queryPhase = RequestPhase.QueryTools; if (Prefs.DevMode) @@ -853,7 +876,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } string queryInstruction = GetToolSystemInstruction(queryPhase); - string queryResponse = await client.GetChatCompletionAsync(queryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f); + string queryResponse = await client.GetChatCompletionAsync(queryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(queryResponse)) { AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate()); @@ -896,7 +919,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori SetThinkingPhase(1, true); string retryQueryInstruction = GetToolSystemInstruction(queryPhase) + "\n\n# RETRY\nYou chose to retry. Output XML tool calls only (or )."; - string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f); + string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(retryQueryResponse)) { AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate()); @@ -916,6 +939,11 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } } + if (settings.showThinkingProcess) + { + AddAssistantMessage("[P.I.A] 正在计算最优战术方案..."); + } + var actionPhase = RequestPhase.ActionTools; if (Prefs.DevMode) { @@ -1096,6 +1124,11 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } } + if (settings.showThinkingProcess) + { + AddAssistantMessage("[P.I.A] 正在汇总战报并建立通讯记录..."); + } + string reply = await client.GetChatCompletionAsync(replyInstruction, BuildReplyHistory()); if (string.IsNullOrEmpty(reply)) { diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs b/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs index 454e5410..698df714 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Agent/AutonomousAgentLoop.cs @@ -121,32 +121,31 @@ namespace WulaFallenEmpire.EventSystem.AI.Agent return; } - var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model); + var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model, settings.useGeminiProtocol); - // 使用 VLM 如果启用且配置了 string decision; - if (settings.enableVlmFeatures && !string.IsNullOrEmpty(settings.vlmApiKey)) + string base64Image = null; + + // 如果启用了视觉特性且开启了原生多模态,则在决策前截图 + if (settings.enableVlmFeatures && settings.useNativeMultimodal) { - // 使用 VLM 分析屏幕 - string base64Image = ScreenCaptureUtility.CaptureScreenAsBase64(); - var vlmClient = new SimpleAIClient(settings.vlmApiKey, settings.vlmBaseUrl, settings.vlmModel); - decision = await vlmClient.GetVisionCompletionAsync( - GetAgentSystemPrompt(), - prompt, - base64Image, - maxTokens: 512, - temperature: 0.3f - ); - } - else - { - // 纯文本模式 - var messages = new List<(string role, string message)> + base64Image = ScreenCaptureUtility.CaptureScreenAsBase64(); + if (settings.showThinkingProcess) { - ("user", prompt) - }; - decision = await client.GetChatCompletionAsync(GetAgentSystemPrompt(), messages, 512, 0.3f); + Messages.Message("AI Agent: 正在通过视觉传感器分析实地情况...", MessageTypeDefOf.NeutralEvent); + } } + else if (settings.showThinkingProcess) + { + Messages.Message("AI Agent: 正在分析传感器遥测数据...", MessageTypeDefOf.NeutralEvent); + } + + // 直接调用 GetChatCompletionAsync (它已支持 multimodal 参数) + var messages = new List<(string role, string message)> + { + ("user", prompt) + }; + decision = await client.GetChatCompletionAsync(GetAgentSystemPrompt(), messages, 512, 0.3f, base64Image: base64Image); if (string.IsNullOrEmpty(decision)) { diff --git a/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs b/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs index c2c4dd72..ce493d82 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/SimpleAIClient.cs @@ -5,6 +5,7 @@ using System.Threading.Tasks; using UnityEngine.Networking; using UnityEngine; using Verse; +using System.Linq; namespace WulaFallenEmpire.EventSystem.AI { @@ -13,17 +14,25 @@ namespace WulaFallenEmpire.EventSystem.AI private readonly string _apiKey; private readonly string _baseUrl; private readonly string _model; + private readonly bool _useGemini; private const int MaxLogChars = 2000; - public SimpleAIClient(string apiKey, string baseUrl, string model) + public SimpleAIClient(string apiKey, string baseUrl, string model, bool useGemini = false) { _apiKey = apiKey; _baseUrl = baseUrl?.TrimEnd('/'); _model = model; + _useGemini = useGemini; } - public async Task GetChatCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null) + public async Task GetChatCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null, string base64Image = null) { + if (_useGemini) + { + return await GetGeminiCompletionAsync(instruction, messages, maxTokens, temperature, base64Image); + } + + // OpenAI / Compatible Mode if (string.IsNullOrEmpty(_baseUrl)) { WulaLog.Debug("[WulaAI] Base URL is missing."); @@ -31,57 +40,108 @@ namespace WulaFallenEmpire.EventSystem.AI } string endpoint = $"{_baseUrl}/chat/completions"; - // Handle cases where baseUrl already includes /v1 or full path if (_baseUrl.EndsWith("/chat/completions")) endpoint = _baseUrl; else if (!_baseUrl.EndsWith("/v1")) endpoint = $"{_baseUrl}/v1/chat/completions"; - // Build JSON manually to avoid dependencies StringBuilder jsonBuilder = new StringBuilder(); jsonBuilder.Append("{"); jsonBuilder.Append($"\"model\": \"{_model}\","); - jsonBuilder.Append("\"stream\": false,"); // We request non-stream, but handle stream if returned - if (maxTokens.HasValue) - { - jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens.Value)},"); - } - if (temperature.HasValue) - { - float clamped = Mathf.Clamp(temperature.Value, 0f, 2f); - jsonBuilder.Append($"\"temperature\": {clamped.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},"); - } - jsonBuilder.Append("\"messages\": ["); + jsonBuilder.Append("\"stream\": false,"); + if (maxTokens.HasValue) jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens.Value)},"); + if (temperature.HasValue) jsonBuilder.Append($"\"temperature\": {temperature.Value.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},"); - // System instruction - bool firstMessage = true; + jsonBuilder.Append("\"messages\": ["); if (!string.IsNullOrEmpty(instruction)) { - jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(instruction)}\"}}"); - firstMessage = false; + jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(instruction)}\"}},"); } - // Messages for (int i = 0; i < messages.Count; i++) { var msg = messages[i]; string role = (msg.role ?? "user").ToLowerInvariant(); - if (role == "ai") role = "assistant"; - else if (role == "tool") role = "system"; // Internal-only role; map to supported role for Chat Completions APIs. + if (role == "ai" || role == "assistant") role = "assistant"; + else if (role == "tool") role = "system"; else if (role == "toolcall") continue; - else if (role != "system" && role != "user" && role != "assistant") role = "user"; - if (!firstMessage) jsonBuilder.Append(","); - jsonBuilder.Append($"{{\"role\": \"{role}\", \"content\": \"{EscapeJson(msg.message)}\"}}"); - firstMessage = false; + jsonBuilder.Append($"{{\"role\": \"{role}\", "); + + if (i == messages.Count - 1 && role == "user" && !string.IsNullOrEmpty(base64Image)) + { + jsonBuilder.Append("\"content\": ["); + jsonBuilder.Append($"{{\"type\": \"text\", \"text\": \"{EscapeJson(msg.message)}\"}},"); + jsonBuilder.Append($"{{\"type\": \"image_url\", \"image_url\": {{\"url\": \"data:image/png;base64,{base64Image}\"}}}}"); + jsonBuilder.Append("]"); + } + else + { + jsonBuilder.Append($"\"content\": \"{EscapeJson(msg.message)}\""); + } + + jsonBuilder.Append("}"); + if (i < messages.Count - 1) jsonBuilder.Append(","); } + jsonBuilder.Append("]}"); + + return await SendRequestAsync(endpoint, jsonBuilder.ToString(), _apiKey); + } + + private async Task GetGeminiCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null, string base64Image = null) + { + // Gemini API URL + string baseUrl = _baseUrl; + if (string.IsNullOrEmpty(baseUrl) || !baseUrl.Contains("googleapis.com")) + { + baseUrl = "https://generativelanguage.googleapis.com/v1beta"; + } + + string endpoint = $"{baseUrl}/models/{_model}:generateContent?key={_apiKey}"; - jsonBuilder.Append("]"); + StringBuilder jsonBuilder = new StringBuilder(); + jsonBuilder.Append("{"); + + if (!string.IsNullOrEmpty(instruction)) + { + jsonBuilder.Append("\"system_instruction\": {\"parts\": [{\"text\": \"" + EscapeJson(instruction) + "\"}]},"); + } + + jsonBuilder.Append("\"contents\": ["); + for (int i = 0; i < messages.Count; i++) + { + var msg = messages[i]; + string role = (msg.role ?? "user").ToLowerInvariant(); + if (role == "assistant" || role == "ai") role = "model"; + else role = "user"; + + jsonBuilder.Append($"{{\"role\": \"{role}\", \"parts\": ["); + jsonBuilder.Append($"{{\"text\": \"{EscapeJson(msg.message)}\"}}"); + + if (i == messages.Count - 1 && role == "user" && !string.IsNullOrEmpty(base64Image)) + { + jsonBuilder.Append($", {{\"inline_data\": {{\"mime_type\": \"image/png\", \"data\": \"{base64Image}\"}}}}"); + } + + jsonBuilder.Append("]}"); + if (i < messages.Count - 1) jsonBuilder.Append(","); + } + jsonBuilder.Append("],"); + + jsonBuilder.Append("\"generationConfig\": {"); + if (temperature.HasValue) jsonBuilder.Append($"\"temperature\": {temperature.Value.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},"); + if (maxTokens.HasValue) jsonBuilder.Append($"\"maxOutputTokens\": {maxTokens.Value}"); + else jsonBuilder.Append("\"maxOutputTokens\": 2048"); jsonBuilder.Append("}"); - string jsonBody = jsonBuilder.ToString(); + jsonBuilder.Append("}"); + + return await SendRequestAsync(endpoint, jsonBuilder.ToString(), null); + } + + private async Task SendRequestAsync(string endpoint, string jsonBody, string apiKey) + { if (Prefs.DevMode) { - WulaLog.Debug($"[WulaAI] Sending request to {endpoint} (model={_model}, messages={messages?.Count ?? 0})"); - WulaLog.Debug($"[WulaAI] Request body (truncated):\n{TruncateForLog(jsonBody)}"); + WulaLog.Debug($"[WulaAI] Sending request to {endpoint}"); } using (UnityWebRequest request = new UnityWebRequest(endpoint, "POST")) @@ -90,150 +150,75 @@ namespace WulaFallenEmpire.EventSystem.AI request.uploadHandler = new UploadHandlerRaw(bodyRaw); request.downloadHandler = new DownloadHandlerBuffer(); request.SetRequestHeader("Content-Type", "application/json"); - request.timeout = 120; // 120 seconds timeout - if (!string.IsNullOrEmpty(_apiKey)) + if (!string.IsNullOrEmpty(apiKey)) { - request.SetRequestHeader("Authorization", $"Bearer {_apiKey}"); + request.SetRequestHeader("Authorization", $"Bearer {apiKey}"); } + request.timeout = 60; var operation = request.SendWebRequest(); + while (!operation.isDone) await Task.Delay(50); - while (!operation.isDone) + if (request.result != UnityWebRequest.Result.Success) { - await Task.Delay(50); - } - - if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError) - { - WulaLog.Debug($"[WulaAI] API Error: {request.error}\nResponse (truncated): {TruncateForLog(request.downloadHandler.text)}"); + string errText = request.downloadHandler.text; + WulaLog.Debug($"[WulaAI] API Error ({request.responseCode}): {request.error}\nResponse: {TruncateForLog(errText)}"); return null; } - string responseText = request.downloadHandler.text; - if (Prefs.DevMode) - { - WulaLog.Debug($"[WulaAI] Raw Response (truncated): {TruncateForLog(responseText)}"); - } - return ExtractContent(responseText); + string response = request.downloadHandler.text; + return ExtractContent(response); } } - private static string TruncateForLog(string s) - { - if (string.IsNullOrEmpty(s)) return s; - if (s.Length <= MaxLogChars) return s; - return s.Substring(0, MaxLogChars) + $"... (truncated, total {s.Length} chars)"; - } - - private string EscapeJson(string s) - { - if (s == null) return ""; - - StringBuilder sb = new StringBuilder(s.Length + 16); - for (int i = 0; i < s.Length; i++) - { - char c = s[i]; - switch (c) - { - case '\\': sb.Append("\\\\"); break; - case '"': sb.Append("\\\""); break; - case '\n': sb.Append("\\n"); break; - case '\r': sb.Append("\\r"); break; - case '\t': sb.Append("\\t"); break; - case '\b': sb.Append("\\b"); break; - case '\f': sb.Append("\\f"); break; - default: - if (c < 0x20) - { - sb.Append("\\u"); - sb.Append(((int)c).ToString("x4")); - } - else - { - sb.Append(c); - } - break; - } - } - return sb.ToString(); - } - private string ExtractContent(string json) { + if (string.IsNullOrWhiteSpace(json)) return null; + try { - // Check for stream format (SSE) - // SSE lines start with "data: " - if (json.TrimStart().StartsWith("data:")) + // 1. Gemini format + if (json.Contains("\"candidates\"")) { - StringBuilder fullContent = new StringBuilder(); - string[] lines = json.Split(new[] { "\n", "\r" }, StringSplitOptions.RemoveEmptyEntries); - foreach (string line in lines) + int partsIndex = json.IndexOf("\"parts\"", StringComparison.Ordinal); + if (partsIndex != -1) return ExtractJsonValue(json, "text", partsIndex); + } + + // 2. OpenAI format + if (json.Contains("\"choices\"")) + { + int choicesIndex = json.IndexOf("\"choices\"", StringComparison.Ordinal); + string firstChoice = TryExtractFirstChoiceObject(json, choicesIndex); + if (!string.IsNullOrEmpty(firstChoice)) { - string trimmedLine = line.Trim(); - if (trimmedLine == "data: [DONE]") continue; - if (trimmedLine.StartsWith("data: ")) - { - string dataJson = trimmedLine.Substring(6); - // Extract content from this chunk - string chunkContent = TryExtractAssistantContent(dataJson) ?? ExtractJsonValue(dataJson, "content"); - if (!string.IsNullOrEmpty(chunkContent)) - { - fullContent.Append(chunkContent); - } - } + int messageIndex = firstChoice.IndexOf("\"message\"", StringComparison.Ordinal); + if (messageIndex != -1) return ExtractJsonValue(firstChoice, "content", messageIndex); + + int deltaIndex = firstChoice.IndexOf("\"delta\"", StringComparison.Ordinal); + if (deltaIndex != -1) return ExtractJsonValue(firstChoice, "content", deltaIndex); + + return ExtractJsonValue(firstChoice, "text", 0); } - return fullContent.ToString(); - } - else - { - // Standard non-stream format - return TryExtractAssistantContent(json) ?? ExtractJsonValue(json, "content"); } + + // 3. Last fallback + return ExtractJsonValue(json, "content"); } catch (Exception ex) { - WulaLog.Debug($"[WulaAI] Error parsing response: {ex}"); + WulaLog.Debug($"[WulaAI] Error parsing response: {ex.Message}"); } return null; } - private static string TryExtractAssistantContent(string json) - { - if (string.IsNullOrWhiteSpace(json)) return null; - - int choicesIndex = json.IndexOf("\"choices\"", StringComparison.Ordinal); - if (choicesIndex == -1) return null; - - string firstChoiceJson = TryExtractFirstChoiceObject(json, choicesIndex); - if (string.IsNullOrEmpty(firstChoiceJson)) return null; - - int messageIndex = firstChoiceJson.IndexOf("\"message\"", StringComparison.Ordinal); - if (messageIndex != -1) - { - return ExtractJsonValue(firstChoiceJson, "content", messageIndex); - } - - int deltaIndex = firstChoiceJson.IndexOf("\"delta\"", StringComparison.Ordinal); - if (deltaIndex != -1) - { - return ExtractJsonValue(firstChoiceJson, "content", deltaIndex); - } - - return ExtractJsonValue(firstChoiceJson, "text", 0); - } - private static string TryExtractFirstChoiceObject(string json, int choicesKeyIndex) { int arrayStart = json.IndexOf('[', choicesKeyIndex); if (arrayStart == -1) return null; - int objStart = json.IndexOf('{', arrayStart); if (objStart == -1) return null; - int objEnd = FindMatchingBrace(json, objStart); if (objEnd == -1) return null; - return json.Substring(objStart, objEnd - objStart + 1); } @@ -242,76 +227,35 @@ namespace WulaFallenEmpire.EventSystem.AI int depth = 0; bool inString = false; bool escaped = false; - for (int i = startIndex; i < json.Length; i++) { char c = json[i]; if (inString) { - if (escaped) - { - escaped = false; - continue; - } - - if (c == '\\') - { - escaped = true; - continue; - } - - if (c == '"') - { - inString = false; - } - + if (escaped) { escaped = false; continue; } + if (c == '\\') { escaped = true; continue; } + if (c == '"') inString = false; continue; } - - if (c == '"') - { - inString = true; - continue; - } - - if (c == '{') - { - depth++; - continue; - } - - if (c == '}') - { - depth--; - if (depth == 0) return i; - } + if (c == '"') { inString = true; continue; } + if (c == '{') depth++; + if (c == '}') { depth--; if (depth == 0) return i; } } - return -1; } - private static string ExtractJsonValue(string json, string key) - { - // Simple parser to find "key": "value" - // This is not a full JSON parser and assumes standard formatting - return ExtractJsonValue(json, key, 0); - } - - private static string ExtractJsonValue(string json, string key, int startIndex) + private static string ExtractJsonValue(string json, string key, int startIndex = 0) { string keyPattern = $"\"{key}\""; int keyIndex = json.IndexOf(keyPattern, startIndex, StringComparison.Ordinal); if (keyIndex == -1) return null; - // Find the colon after the key int colonIndex = json.IndexOf(':', keyIndex + keyPattern.Length); if (colonIndex == -1) return null; - // Find the opening quote of the value int valueStart = json.IndexOf('"', colonIndex); if (valueStart == -1) return null; - // Extract string with escape handling StringBuilder sb = new StringBuilder(); bool escaped = false; for (int i = valueStart + 1; i < json.Length; i++) @@ -324,113 +268,47 @@ namespace WulaFallenEmpire.EventSystem.AI else if (c == 't') sb.Append('\t'); else if (c == '"') sb.Append('"'); else if (c == '\\') sb.Append('\\'); - else sb.Append(c); // Literal + else sb.Append(c); escaped = false; } else { - if (c == '\\') - { - escaped = true; - } - else if (c == '"') - { - // End of string - return sb.ToString(); - } - else - { - sb.Append(c); - } + if (c == '\\') escaped = true; + else if (c == '"') return sb.ToString(); + else sb.Append(c); } } return null; } - /// - /// 发送带图片的 VLM 视觉请求 - /// - public async Task GetVisionCompletionAsync( - string systemPrompt, - string userText, - string base64Image, - int maxTokens = 512, - float temperature = 0.3f) + private string EscapeJson(string s) { - if (string.IsNullOrEmpty(_baseUrl)) + if (s == null) return ""; + StringBuilder sb = new StringBuilder(s.Length + 16); + for (int i = 0; i < s.Length; i++) { - WulaLog.Debug("[WulaAI] VLM: Base URL is missing."); - return null; - } - - string endpoint = $"{_baseUrl}/chat/completions"; - if (_baseUrl.EndsWith("/chat/completions")) endpoint = _baseUrl; - else if (!_baseUrl.EndsWith("/v1")) endpoint = $"{_baseUrl}/v1/chat/completions"; - - // Build VLM-specific JSON with image content - StringBuilder jsonBuilder = new StringBuilder(); - jsonBuilder.Append("{"); - jsonBuilder.Append($"\"model\": \"{_model}\","); - jsonBuilder.Append("\"stream\": false,"); - jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens)},"); - jsonBuilder.Append($"\"temperature\": {Mathf.Clamp(temperature, 0f, 2f).ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},"); - jsonBuilder.Append("\"messages\": ["); - - // System message - if (!string.IsNullOrEmpty(systemPrompt)) - { - jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(systemPrompt)}\"}},"); - } - - // User message with image (multimodal content) - jsonBuilder.Append("{\"role\": \"user\", \"content\": ["); - jsonBuilder.Append($"{{\"type\": \"text\", \"text\": \"{EscapeJson(userText)}\"}},"); - jsonBuilder.Append("{\"type\": \"image_url\", \"image_url\": {"); - jsonBuilder.Append($"\"url\": \"data:image/png;base64,{base64Image}\""); - jsonBuilder.Append("}}"); - jsonBuilder.Append("]}"); - - jsonBuilder.Append("]}"); - - string jsonBody = jsonBuilder.ToString(); - if (Prefs.DevMode) - { - // Don't log the full base64 image - WulaLog.Debug($"[WulaAI] VLM request to {endpoint} (model={_model}, imageSize={base64Image?.Length ?? 0} chars)"); - } - - using (UnityWebRequest request = new UnityWebRequest(endpoint, "POST")) - { - byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody); - request.uploadHandler = new UploadHandlerRaw(bodyRaw); - request.downloadHandler = new DownloadHandlerBuffer(); - request.SetRequestHeader("Content-Type", "application/json"); - request.timeout = 60; // VLM requests may take longer due to image processing - if (!string.IsNullOrEmpty(_apiKey)) + char c = s[i]; + switch (c) { - request.SetRequestHeader("Authorization", $"Bearer {_apiKey}"); + case '\\': sb.Append("\\\\"); break; + case '"': sb.Append("\\\""); break; + case '\n': sb.Append("\\n"); break; + case '\r': sb.Append("\\r"); break; + case '\t': sb.Append("\\t"); break; + default: + if (c < 0x20) { sb.Append("\\u"); sb.Append(((int)c).ToString("x4")); } + else sb.Append(c); + break; } - - var operation = request.SendWebRequest(); - - while (!operation.isDone) - { - await Task.Delay(100); - } - - if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError) - { - WulaLog.Debug($"[WulaAI] VLM API Error: {request.error}"); - return null; - } - - string responseText = request.downloadHandler.text; - if (Prefs.DevMode) - { - WulaLog.Debug($"[WulaAI] VLM Response (truncated): {TruncateForLog(responseText)}"); - } - return ExtractContent(responseText); } + return sb.ToString(); + } + + private static string TruncateForLog(string s) + { + if (string.IsNullOrEmpty(s)) return s; + if (s.Length <= MaxLogChars) return s; + return s.Substring(0, MaxLogChars) + "... (truncated)"; } } } diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs index 695b7887..cf481063 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs @@ -47,14 +47,14 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools return "Mod 设置未初始化。"; } - // 使用主 API 密钥(如果没有单独配置 VLM 密钥) - string vlmApiKey = !string.IsNullOrEmpty(settings.vlmApiKey) ? settings.vlmApiKey : settings.apiKey; - string vlmBaseUrl = !string.IsNullOrEmpty(settings.vlmBaseUrl) ? settings.vlmBaseUrl : "https://dashscope.aliyuncs.com/compatible-mode/v1"; - string vlmModel = !string.IsNullOrEmpty(settings.vlmModel) ? settings.vlmModel : "qwen-vl-plus"; + // 使用主 API 配置 + string vlmApiKey = settings.apiKey; + string vlmBaseUrl = settings.baseUrl; + string vlmModel = settings.model; if (string.IsNullOrEmpty(vlmApiKey)) { - return "VLM API 密钥未配置。请在 Mod 设置中配置 API 密钥。"; + return "主 API 密钥未配置。请在 Mod 设置中配置。"; } // 截取屏幕 @@ -64,15 +64,20 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools return "截屏失败,无法分析屏幕。"; } - // 调用 VLM API - var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel); + // 调用 VLM API (使用统一的 GetChatCompletionAsync) + var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel, settings.useGeminiProtocol); - string result = await client.GetVisionCompletionAsync( + var messages = new System.Collections.Generic.List<(string role, string message)> + { + ("user", instruction) + }; + + string result = await client.GetChatCompletionAsync( BaseVisionSystemPrompt, - instruction, - base64Image, - maxTokens: 512, // 增加 token 数以支持更复杂的分析指令响应 - temperature: 0.2f + messages, + maxTokens: 512, + temperature: 0.2f, + base64Image: base64Image ); if (string.IsNullOrEmpty(result)) diff --git a/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs b/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs index c7afb4e3..c7fe1c3f 100644 --- a/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs +++ b/Source/WulaFallenEmpire/WulaFallenEmpireMod.cs @@ -59,6 +59,12 @@ namespace WulaFallenEmpire listingStandard.Label("Wula_AISettings_Model".Translate()); settings.model = listingStandard.TextEntry(settings.model); + listingStandard.Gap(5f); + listingStandard.Label("API 协议格式:"); + if (listingStandard.RadioButton("OpenAI / 常用兼容格式 (默认)", !settings.useGeminiProtocol)) settings.useGeminiProtocol = false; + if (listingStandard.RadioButton("Google Gemini 原生格式", settings.useGeminiProtocol)) settings.useGeminiProtocol = true; + listingStandard.Gap(5f); + listingStandard.GapLine(); listingStandard.Label("Wula_AISettings_MaxContextTokens".Translate()); listingStandard.Label("Wula_AISettings_MaxContextTokensDesc".Translate()); @@ -68,35 +74,16 @@ namespace WulaFallenEmpire listingStandard.GapLine(); listingStandard.CheckboxLabeled("Wula_EnableDebugLogs".Translate(), ref settings.enableDebugLogs, "Wula_EnableDebugLogsDesc".Translate()); - // VLM 设置部分 + // 视觉设置部分 listingStandard.GapLine(); - listingStandard.Label("VLM (视觉模型) 设置"); + listingStandard.Label("视觉与多模态设置"); - listingStandard.CheckboxLabeled("启用 VLM 视觉功能", ref settings.enableVlmFeatures, "启用后 AI 可以「看到」游戏屏幕并分析"); + listingStandard.CheckboxLabeled("启用视觉交互能力", ref settings.enableVlmFeatures, "启用后 AI 可以截取屏幕并理解游戏画面"); if (settings.enableVlmFeatures) { - listingStandard.Label("VLM API Key:"); - Rect vlmKeyRect = listingStandard.GetRect(30f); - Rect vlmPasswordRect = new Rect(vlmKeyRect.x, vlmKeyRect.y, vlmKeyRect.width - toggleWidth - 5f, vlmKeyRect.height); - Rect vlmToggleRect = new Rect(vlmKeyRect.xMax - toggleWidth, vlmKeyRect.y, toggleWidth, vlmKeyRect.height); - - if (_showVlmApiKey) - { - settings.vlmApiKey = Widgets.TextField(vlmPasswordRect, settings.vlmApiKey ?? ""); - } - else - { - settings.vlmApiKey = GUI.PasswordField(vlmPasswordRect, settings.vlmApiKey ?? "", '•'); - } - Widgets.CheckboxLabeled(vlmToggleRect, "Show", ref _showVlmApiKey); - listingStandard.Gap(listingStandard.verticalSpacing); - - listingStandard.Label("VLM Base URL:"); - settings.vlmBaseUrl = listingStandard.TextEntry(settings.vlmBaseUrl ?? "https://dashscope.aliyuncs.com/compatible-mode/v1"); - - listingStandard.Label("VLM Model:"); - settings.vlmModel = listingStandard.TextEntry(settings.vlmModel ?? "qwen-vl-max"); + listingStandard.CheckboxLabeled("优先使用原生多模态模式", ref settings.useNativeMultimodal, "直接在思考阶段将截图发送给主模型(推荐,速度更快,需模型支持视角)"); + listingStandard.CheckboxLabeled("在 UI 中显示中间思考过程", ref settings.showThinkingProcess, "显示 AI 执行工具时的状态反馈"); } listingStandard.GapLine(); diff --git a/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs b/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs index fc8c6c52..5b6f9d5a 100644 --- a/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs +++ b/Source/WulaFallenEmpire/WulaFallenEmpireSettings.cs @@ -7,28 +7,28 @@ namespace WulaFallenEmpire public string apiKey = "sk-xxxxxxxx"; public string baseUrl = "https://api.deepseek.com"; public string model = "deepseek-chat"; + public bool useGeminiProtocol = false; // 是否使用 Google Gemini 协议格式 public int maxContextTokens = 100000; public bool enableDebugLogs = false; - // VLM (视觉语言模型) 配置 - public string vlmApiKey = ""; - public string vlmBaseUrl = "https://dashscope.aliyuncs.com/compatible-mode/v1"; - public string vlmModel = "qwen-vl-plus"; + // 视觉功能配置 public bool enableVlmFeatures = false; - + public bool useNativeMultimodal = true; // 默认启用原生多模态 + public bool showThinkingProcess = true; // 是否显示中间思考过过程 + public override void ExposeData() { Scribe_Values.Look(ref apiKey, "apiKey", "sk-xxxxxxxx"); Scribe_Values.Look(ref baseUrl, "baseUrl", "https://api.deepseek.com"); Scribe_Values.Look(ref model, "model", "deepseek-chat"); + Scribe_Values.Look(ref useGeminiProtocol, "useGeminiProtocol", false); Scribe_Values.Look(ref maxContextTokens, "maxContextTokens", 100000); Scribe_Values.Look(ref enableDebugLogs, "enableDebugLogs", false); - // VLM 配置 - Scribe_Values.Look(ref vlmApiKey, "vlmApiKey", ""); - Scribe_Values.Look(ref vlmBaseUrl, "vlmBaseUrl", "https://dashscope.aliyuncs.com/compatible-mode/v1"); - Scribe_Values.Look(ref vlmModel, "vlmModel", "qwen-vl-plus"); + // 简化后的视觉配置 Scribe_Values.Look(ref enableVlmFeatures, "enableVlmFeatures", false); + Scribe_Values.Look(ref useNativeMultimodal, "useNativeMultimodal", true); + Scribe_Values.Look(ref showThinkingProcess, "showThinkingProcess", true); base.ExposeData(); }