This commit is contained in:
2025-12-27 23:09:19 +08:00
parent 8112ddbce6
commit f9f8b5f291
8 changed files with 251 additions and 349 deletions

View File

@@ -49,6 +49,7 @@ namespace WulaFallenEmpire.EventSystem.AI
private readonly HashSet<string> _actionSuccessLedgerSet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
private readonly List<string> _actionFailedLedger = new List<string>();
private readonly HashSet<string> _actionFailedLedgerSet = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
private SimpleAIClient _currentClient;
private const int DefaultMaxHistoryTokens = 100000;
private const int CharsPerToken = 4;
@@ -483,6 +484,12 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
"If no action is required, output exactly: <no_action/>.\n" +
"Query tools exist but are disabled in this phase (not listed here).\n"
: string.Empty;
if (WulaFallenEmpireMod.settings?.enableVlmFeatures == true && WulaFallenEmpireMod.settings?.useNativeMultimodal == true)
{
phaseInstruction += "\n- NATIVE MULTIMODAL: A current screenshot of the game is attached to this request. You can see the game state directly. Use it to determine coordinates for visual tools or to understand the context.";
}
string actionWhitelist = phase == RequestPhase.ActionTools
? "ACTION PHASE VALID TAGS ONLY:\n" +
"<spawn_resources>, <send_reinforcement>, <call_bombardment>, <modify_goodwill>, <visual_click>, <visual_scroll>, <visual_type_text>, <visual_drag>, <visual_hotkey>, <visual_wait>, <visual_delete_text>, <no_action/>\n" +
@@ -844,7 +851,23 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
return;
}
var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model);
var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model, settings.useGeminiProtocol);
_currentClient = client;
// 只有当启用了 VLM 特性,且开启了原生多模态模式时,才截图并在请求中包含图片
string base64Image = null;
if (settings.enableVlmFeatures && settings.useNativeMultimodal)
{
base64Image = ScreenCaptureUtility.CaptureScreenAsBase64();
if (settings.showThinkingProcess)
{
AddAssistantMessage("<i>[P.I.A] 正在扫描当前战区情况...</i>");
}
}
else if (settings.showThinkingProcess)
{
AddAssistantMessage("<i>[P.I.A] 正在分析数据链路...</i>");
}
var queryPhase = RequestPhase.QueryTools;
if (Prefs.DevMode)
@@ -853,7 +876,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
}
string queryInstruction = GetToolSystemInstruction(queryPhase);
string queryResponse = await client.GetChatCompletionAsync(queryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f);
string queryResponse = await client.GetChatCompletionAsync(queryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f, base64Image: base64Image);
if (string.IsNullOrEmpty(queryResponse))
{
AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate());
@@ -896,7 +919,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
SetThinkingPhase(1, true);
string retryQueryInstruction = GetToolSystemInstruction(queryPhase) +
"\n\n# RETRY\nYou chose to retry. Output XML tool calls only (or <no_action/>).";
string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f);
string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 128, temperature: 0.1f, base64Image: base64Image);
if (string.IsNullOrEmpty(retryQueryResponse))
{
AddAssistantMessage("Wula_AI_Error_ConnectionLost".Translate());
@@ -916,6 +939,11 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
}
}
if (settings.showThinkingProcess)
{
AddAssistantMessage("<i>[P.I.A] 正在计算最优战术方案...</i>");
}
var actionPhase = RequestPhase.ActionTools;
if (Prefs.DevMode)
{
@@ -1096,6 +1124,11 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori
}
}
if (settings.showThinkingProcess)
{
AddAssistantMessage("<i>[P.I.A] 正在汇总战报并建立通讯记录...</i>");
}
string reply = await client.GetChatCompletionAsync(replyInstruction, BuildReplyHistory());
if (string.IsNullOrEmpty(reply))
{

View File

@@ -121,32 +121,31 @@ namespace WulaFallenEmpire.EventSystem.AI.Agent
return;
}
var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model);
var client = new SimpleAIClient(settings.apiKey, settings.baseUrl, settings.model, settings.useGeminiProtocol);
// 使用 VLM 如果启用且配置了
string decision;
if (settings.enableVlmFeatures && !string.IsNullOrEmpty(settings.vlmApiKey))
string base64Image = null;
// 如果启用了视觉特性且开启了原生多模态,则在决策前截图
if (settings.enableVlmFeatures && settings.useNativeMultimodal)
{
// 使用 VLM 分析屏幕
string base64Image = ScreenCaptureUtility.CaptureScreenAsBase64();
var vlmClient = new SimpleAIClient(settings.vlmApiKey, settings.vlmBaseUrl, settings.vlmModel);
decision = await vlmClient.GetVisionCompletionAsync(
GetAgentSystemPrompt(),
prompt,
base64Image,
maxTokens: 512,
temperature: 0.3f
);
}
else
{
// 纯文本模式
var messages = new List<(string role, string message)>
base64Image = ScreenCaptureUtility.CaptureScreenAsBase64();
if (settings.showThinkingProcess)
{
("user", prompt)
};
decision = await client.GetChatCompletionAsync(GetAgentSystemPrompt(), messages, 512, 0.3f);
Messages.Message("AI Agent: 正在通过视觉传感器分析实地情况...", MessageTypeDefOf.NeutralEvent);
}
}
else if (settings.showThinkingProcess)
{
Messages.Message("AI Agent: 正在分析传感器遥测数据...", MessageTypeDefOf.NeutralEvent);
}
// 直接调用 GetChatCompletionAsync (它已支持 multimodal 参数)
var messages = new List<(string role, string message)>
{
("user", prompt)
};
decision = await client.GetChatCompletionAsync(GetAgentSystemPrompt(), messages, 512, 0.3f, base64Image: base64Image);
if (string.IsNullOrEmpty(decision))
{

View File

@@ -5,6 +5,7 @@ using System.Threading.Tasks;
using UnityEngine.Networking;
using UnityEngine;
using Verse;
using System.Linq;
namespace WulaFallenEmpire.EventSystem.AI
{
@@ -13,17 +14,25 @@ namespace WulaFallenEmpire.EventSystem.AI
private readonly string _apiKey;
private readonly string _baseUrl;
private readonly string _model;
private readonly bool _useGemini;
private const int MaxLogChars = 2000;
public SimpleAIClient(string apiKey, string baseUrl, string model)
public SimpleAIClient(string apiKey, string baseUrl, string model, bool useGemini = false)
{
_apiKey = apiKey;
_baseUrl = baseUrl?.TrimEnd('/');
_model = model;
_useGemini = useGemini;
}
public async Task<string> GetChatCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null)
public async Task<string> GetChatCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null, string base64Image = null)
{
if (_useGemini)
{
return await GetGeminiCompletionAsync(instruction, messages, maxTokens, temperature, base64Image);
}
// OpenAI / Compatible Mode
if (string.IsNullOrEmpty(_baseUrl))
{
WulaLog.Debug("[WulaAI] Base URL is missing.");
@@ -31,57 +40,108 @@ namespace WulaFallenEmpire.EventSystem.AI
}
string endpoint = $"{_baseUrl}/chat/completions";
// Handle cases where baseUrl already includes /v1 or full path
if (_baseUrl.EndsWith("/chat/completions")) endpoint = _baseUrl;
else if (!_baseUrl.EndsWith("/v1")) endpoint = $"{_baseUrl}/v1/chat/completions";
// Build JSON manually to avoid dependencies
StringBuilder jsonBuilder = new StringBuilder();
jsonBuilder.Append("{");
jsonBuilder.Append($"\"model\": \"{_model}\",");
jsonBuilder.Append("\"stream\": false,"); // We request non-stream, but handle stream if returned
if (maxTokens.HasValue)
{
jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens.Value)},");
}
if (temperature.HasValue)
{
float clamped = Mathf.Clamp(temperature.Value, 0f, 2f);
jsonBuilder.Append($"\"temperature\": {clamped.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},");
}
jsonBuilder.Append("\"messages\": [");
jsonBuilder.Append("\"stream\": false,");
if (maxTokens.HasValue) jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens.Value)},");
if (temperature.HasValue) jsonBuilder.Append($"\"temperature\": {temperature.Value.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},");
// System instruction
bool firstMessage = true;
jsonBuilder.Append("\"messages\": [");
if (!string.IsNullOrEmpty(instruction))
{
jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(instruction)}\"}}");
firstMessage = false;
jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(instruction)}\"}},");
}
// Messages
for (int i = 0; i < messages.Count; i++)
{
var msg = messages[i];
string role = (msg.role ?? "user").ToLowerInvariant();
if (role == "ai") role = "assistant";
else if (role == "tool") role = "system"; // Internal-only role; map to supported role for Chat Completions APIs.
if (role == "ai" || role == "assistant") role = "assistant";
else if (role == "tool") role = "system";
else if (role == "toolcall") continue;
else if (role != "system" && role != "user" && role != "assistant") role = "user";
if (!firstMessage) jsonBuilder.Append(",");
jsonBuilder.Append($"{{\"role\": \"{role}\", \"content\": \"{EscapeJson(msg.message)}\"}}");
firstMessage = false;
jsonBuilder.Append($"{{\"role\": \"{role}\", ");
if (i == messages.Count - 1 && role == "user" && !string.IsNullOrEmpty(base64Image))
{
jsonBuilder.Append("\"content\": [");
jsonBuilder.Append($"{{\"type\": \"text\", \"text\": \"{EscapeJson(msg.message)}\"}},");
jsonBuilder.Append($"{{\"type\": \"image_url\", \"image_url\": {{\"url\": \"data:image/png;base64,{base64Image}\"}}}}");
jsonBuilder.Append("]");
}
else
{
jsonBuilder.Append($"\"content\": \"{EscapeJson(msg.message)}\"");
}
jsonBuilder.Append("}");
if (i < messages.Count - 1) jsonBuilder.Append(",");
}
jsonBuilder.Append("]}");
return await SendRequestAsync(endpoint, jsonBuilder.ToString(), _apiKey);
}
private async Task<string> GetGeminiCompletionAsync(string instruction, List<(string role, string message)> messages, int? maxTokens = null, float? temperature = null, string base64Image = null)
{
// Gemini API URL
string baseUrl = _baseUrl;
if (string.IsNullOrEmpty(baseUrl) || !baseUrl.Contains("googleapis.com"))
{
baseUrl = "https://generativelanguage.googleapis.com/v1beta";
}
string endpoint = $"{baseUrl}/models/{_model}:generateContent?key={_apiKey}";
jsonBuilder.Append("]");
StringBuilder jsonBuilder = new StringBuilder();
jsonBuilder.Append("{");
if (!string.IsNullOrEmpty(instruction))
{
jsonBuilder.Append("\"system_instruction\": {\"parts\": [{\"text\": \"" + EscapeJson(instruction) + "\"}]},");
}
jsonBuilder.Append("\"contents\": [");
for (int i = 0; i < messages.Count; i++)
{
var msg = messages[i];
string role = (msg.role ?? "user").ToLowerInvariant();
if (role == "assistant" || role == "ai") role = "model";
else role = "user";
jsonBuilder.Append($"{{\"role\": \"{role}\", \"parts\": [");
jsonBuilder.Append($"{{\"text\": \"{EscapeJson(msg.message)}\"}}");
if (i == messages.Count - 1 && role == "user" && !string.IsNullOrEmpty(base64Image))
{
jsonBuilder.Append($", {{\"inline_data\": {{\"mime_type\": \"image/png\", \"data\": \"{base64Image}\"}}}}");
}
jsonBuilder.Append("]}");
if (i < messages.Count - 1) jsonBuilder.Append(",");
}
jsonBuilder.Append("],");
jsonBuilder.Append("\"generationConfig\": {");
if (temperature.HasValue) jsonBuilder.Append($"\"temperature\": {temperature.Value.ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},");
if (maxTokens.HasValue) jsonBuilder.Append($"\"maxOutputTokens\": {maxTokens.Value}");
else jsonBuilder.Append("\"maxOutputTokens\": 2048");
jsonBuilder.Append("}");
string jsonBody = jsonBuilder.ToString();
jsonBuilder.Append("}");
return await SendRequestAsync(endpoint, jsonBuilder.ToString(), null);
}
private async Task<string> SendRequestAsync(string endpoint, string jsonBody, string apiKey)
{
if (Prefs.DevMode)
{
WulaLog.Debug($"[WulaAI] Sending request to {endpoint} (model={_model}, messages={messages?.Count ?? 0})");
WulaLog.Debug($"[WulaAI] Request body (truncated):\n{TruncateForLog(jsonBody)}");
WulaLog.Debug($"[WulaAI] Sending request to {endpoint}");
}
using (UnityWebRequest request = new UnityWebRequest(endpoint, "POST"))
@@ -90,150 +150,75 @@ namespace WulaFallenEmpire.EventSystem.AI
request.uploadHandler = new UploadHandlerRaw(bodyRaw);
request.downloadHandler = new DownloadHandlerBuffer();
request.SetRequestHeader("Content-Type", "application/json");
request.timeout = 120; // 120 seconds timeout
if (!string.IsNullOrEmpty(_apiKey))
if (!string.IsNullOrEmpty(apiKey))
{
request.SetRequestHeader("Authorization", $"Bearer {_apiKey}");
request.SetRequestHeader("Authorization", $"Bearer {apiKey}");
}
request.timeout = 60;
var operation = request.SendWebRequest();
while (!operation.isDone) await Task.Delay(50);
while (!operation.isDone)
if (request.result != UnityWebRequest.Result.Success)
{
await Task.Delay(50);
}
if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError)
{
WulaLog.Debug($"[WulaAI] API Error: {request.error}\nResponse (truncated): {TruncateForLog(request.downloadHandler.text)}");
string errText = request.downloadHandler.text;
WulaLog.Debug($"[WulaAI] API Error ({request.responseCode}): {request.error}\nResponse: {TruncateForLog(errText)}");
return null;
}
string responseText = request.downloadHandler.text;
if (Prefs.DevMode)
{
WulaLog.Debug($"[WulaAI] Raw Response (truncated): {TruncateForLog(responseText)}");
}
return ExtractContent(responseText);
string response = request.downloadHandler.text;
return ExtractContent(response);
}
}
private static string TruncateForLog(string s)
{
if (string.IsNullOrEmpty(s)) return s;
if (s.Length <= MaxLogChars) return s;
return s.Substring(0, MaxLogChars) + $"... (truncated, total {s.Length} chars)";
}
private string EscapeJson(string s)
{
if (s == null) return "";
StringBuilder sb = new StringBuilder(s.Length + 16);
for (int i = 0; i < s.Length; i++)
{
char c = s[i];
switch (c)
{
case '\\': sb.Append("\\\\"); break;
case '"': sb.Append("\\\""); break;
case '\n': sb.Append("\\n"); break;
case '\r': sb.Append("\\r"); break;
case '\t': sb.Append("\\t"); break;
case '\b': sb.Append("\\b"); break;
case '\f': sb.Append("\\f"); break;
default:
if (c < 0x20)
{
sb.Append("\\u");
sb.Append(((int)c).ToString("x4"));
}
else
{
sb.Append(c);
}
break;
}
}
return sb.ToString();
}
private string ExtractContent(string json)
{
if (string.IsNullOrWhiteSpace(json)) return null;
try
{
// Check for stream format (SSE)
// SSE lines start with "data: "
if (json.TrimStart().StartsWith("data:"))
// 1. Gemini format
if (json.Contains("\"candidates\""))
{
StringBuilder fullContent = new StringBuilder();
string[] lines = json.Split(new[] { "\n", "\r" }, StringSplitOptions.RemoveEmptyEntries);
foreach (string line in lines)
int partsIndex = json.IndexOf("\"parts\"", StringComparison.Ordinal);
if (partsIndex != -1) return ExtractJsonValue(json, "text", partsIndex);
}
// 2. OpenAI format
if (json.Contains("\"choices\""))
{
int choicesIndex = json.IndexOf("\"choices\"", StringComparison.Ordinal);
string firstChoice = TryExtractFirstChoiceObject(json, choicesIndex);
if (!string.IsNullOrEmpty(firstChoice))
{
string trimmedLine = line.Trim();
if (trimmedLine == "data: [DONE]") continue;
if (trimmedLine.StartsWith("data: "))
{
string dataJson = trimmedLine.Substring(6);
// Extract content from this chunk
string chunkContent = TryExtractAssistantContent(dataJson) ?? ExtractJsonValue(dataJson, "content");
if (!string.IsNullOrEmpty(chunkContent))
{
fullContent.Append(chunkContent);
}
}
int messageIndex = firstChoice.IndexOf("\"message\"", StringComparison.Ordinal);
if (messageIndex != -1) return ExtractJsonValue(firstChoice, "content", messageIndex);
int deltaIndex = firstChoice.IndexOf("\"delta\"", StringComparison.Ordinal);
if (deltaIndex != -1) return ExtractJsonValue(firstChoice, "content", deltaIndex);
return ExtractJsonValue(firstChoice, "text", 0);
}
return fullContent.ToString();
}
else
{
// Standard non-stream format
return TryExtractAssistantContent(json) ?? ExtractJsonValue(json, "content");
}
// 3. Last fallback
return ExtractJsonValue(json, "content");
}
catch (Exception ex)
{
WulaLog.Debug($"[WulaAI] Error parsing response: {ex}");
WulaLog.Debug($"[WulaAI] Error parsing response: {ex.Message}");
}
return null;
}
private static string TryExtractAssistantContent(string json)
{
if (string.IsNullOrWhiteSpace(json)) return null;
int choicesIndex = json.IndexOf("\"choices\"", StringComparison.Ordinal);
if (choicesIndex == -1) return null;
string firstChoiceJson = TryExtractFirstChoiceObject(json, choicesIndex);
if (string.IsNullOrEmpty(firstChoiceJson)) return null;
int messageIndex = firstChoiceJson.IndexOf("\"message\"", StringComparison.Ordinal);
if (messageIndex != -1)
{
return ExtractJsonValue(firstChoiceJson, "content", messageIndex);
}
int deltaIndex = firstChoiceJson.IndexOf("\"delta\"", StringComparison.Ordinal);
if (deltaIndex != -1)
{
return ExtractJsonValue(firstChoiceJson, "content", deltaIndex);
}
return ExtractJsonValue(firstChoiceJson, "text", 0);
}
private static string TryExtractFirstChoiceObject(string json, int choicesKeyIndex)
{
int arrayStart = json.IndexOf('[', choicesKeyIndex);
if (arrayStart == -1) return null;
int objStart = json.IndexOf('{', arrayStart);
if (objStart == -1) return null;
int objEnd = FindMatchingBrace(json, objStart);
if (objEnd == -1) return null;
return json.Substring(objStart, objEnd - objStart + 1);
}
@@ -242,76 +227,35 @@ namespace WulaFallenEmpire.EventSystem.AI
int depth = 0;
bool inString = false;
bool escaped = false;
for (int i = startIndex; i < json.Length; i++)
{
char c = json[i];
if (inString)
{
if (escaped)
{
escaped = false;
continue;
}
if (c == '\\')
{
escaped = true;
continue;
}
if (c == '"')
{
inString = false;
}
if (escaped) { escaped = false; continue; }
if (c == '\\') { escaped = true; continue; }
if (c == '"') inString = false;
continue;
}
if (c == '"')
{
inString = true;
continue;
}
if (c == '{')
{
depth++;
continue;
}
if (c == '}')
{
depth--;
if (depth == 0) return i;
}
if (c == '"') { inString = true; continue; }
if (c == '{') depth++;
if (c == '}') { depth--; if (depth == 0) return i; }
}
return -1;
}
private static string ExtractJsonValue(string json, string key)
{
// Simple parser to find "key": "value"
// This is not a full JSON parser and assumes standard formatting
return ExtractJsonValue(json, key, 0);
}
private static string ExtractJsonValue(string json, string key, int startIndex)
private static string ExtractJsonValue(string json, string key, int startIndex = 0)
{
string keyPattern = $"\"{key}\"";
int keyIndex = json.IndexOf(keyPattern, startIndex, StringComparison.Ordinal);
if (keyIndex == -1) return null;
// Find the colon after the key
int colonIndex = json.IndexOf(':', keyIndex + keyPattern.Length);
if (colonIndex == -1) return null;
// Find the opening quote of the value
int valueStart = json.IndexOf('"', colonIndex);
if (valueStart == -1) return null;
// Extract string with escape handling
StringBuilder sb = new StringBuilder();
bool escaped = false;
for (int i = valueStart + 1; i < json.Length; i++)
@@ -324,113 +268,47 @@ namespace WulaFallenEmpire.EventSystem.AI
else if (c == 't') sb.Append('\t');
else if (c == '"') sb.Append('"');
else if (c == '\\') sb.Append('\\');
else sb.Append(c); // Literal
else sb.Append(c);
escaped = false;
}
else
{
if (c == '\\')
{
escaped = true;
}
else if (c == '"')
{
// End of string
return sb.ToString();
}
else
{
sb.Append(c);
}
if (c == '\\') escaped = true;
else if (c == '"') return sb.ToString();
else sb.Append(c);
}
}
return null;
}
/// <summary>
/// 发送带图片的 VLM 视觉请求
/// </summary>
public async Task<string> GetVisionCompletionAsync(
string systemPrompt,
string userText,
string base64Image,
int maxTokens = 512,
float temperature = 0.3f)
private string EscapeJson(string s)
{
if (string.IsNullOrEmpty(_baseUrl))
if (s == null) return "";
StringBuilder sb = new StringBuilder(s.Length + 16);
for (int i = 0; i < s.Length; i++)
{
WulaLog.Debug("[WulaAI] VLM: Base URL is missing.");
return null;
}
string endpoint = $"{_baseUrl}/chat/completions";
if (_baseUrl.EndsWith("/chat/completions")) endpoint = _baseUrl;
else if (!_baseUrl.EndsWith("/v1")) endpoint = $"{_baseUrl}/v1/chat/completions";
// Build VLM-specific JSON with image content
StringBuilder jsonBuilder = new StringBuilder();
jsonBuilder.Append("{");
jsonBuilder.Append($"\"model\": \"{_model}\",");
jsonBuilder.Append("\"stream\": false,");
jsonBuilder.Append($"\"max_tokens\": {Math.Max(1, maxTokens)},");
jsonBuilder.Append($"\"temperature\": {Mathf.Clamp(temperature, 0f, 2f).ToString("0.###", System.Globalization.CultureInfo.InvariantCulture)},");
jsonBuilder.Append("\"messages\": [");
// System message
if (!string.IsNullOrEmpty(systemPrompt))
{
jsonBuilder.Append($"{{\"role\": \"system\", \"content\": \"{EscapeJson(systemPrompt)}\"}},");
}
// User message with image (multimodal content)
jsonBuilder.Append("{\"role\": \"user\", \"content\": [");
jsonBuilder.Append($"{{\"type\": \"text\", \"text\": \"{EscapeJson(userText)}\"}},");
jsonBuilder.Append("{\"type\": \"image_url\", \"image_url\": {");
jsonBuilder.Append($"\"url\": \"data:image/png;base64,{base64Image}\"");
jsonBuilder.Append("}}");
jsonBuilder.Append("]}");
jsonBuilder.Append("]}");
string jsonBody = jsonBuilder.ToString();
if (Prefs.DevMode)
{
// Don't log the full base64 image
WulaLog.Debug($"[WulaAI] VLM request to {endpoint} (model={_model}, imageSize={base64Image?.Length ?? 0} chars)");
}
using (UnityWebRequest request = new UnityWebRequest(endpoint, "POST"))
{
byte[] bodyRaw = Encoding.UTF8.GetBytes(jsonBody);
request.uploadHandler = new UploadHandlerRaw(bodyRaw);
request.downloadHandler = new DownloadHandlerBuffer();
request.SetRequestHeader("Content-Type", "application/json");
request.timeout = 60; // VLM requests may take longer due to image processing
if (!string.IsNullOrEmpty(_apiKey))
char c = s[i];
switch (c)
{
request.SetRequestHeader("Authorization", $"Bearer {_apiKey}");
case '\\': sb.Append("\\\\"); break;
case '"': sb.Append("\\\""); break;
case '\n': sb.Append("\\n"); break;
case '\r': sb.Append("\\r"); break;
case '\t': sb.Append("\\t"); break;
default:
if (c < 0x20) { sb.Append("\\u"); sb.Append(((int)c).ToString("x4")); }
else sb.Append(c);
break;
}
var operation = request.SendWebRequest();
while (!operation.isDone)
{
await Task.Delay(100);
}
if (request.result == UnityWebRequest.Result.ConnectionError || request.result == UnityWebRequest.Result.ProtocolError)
{
WulaLog.Debug($"[WulaAI] VLM API Error: {request.error}");
return null;
}
string responseText = request.downloadHandler.text;
if (Prefs.DevMode)
{
WulaLog.Debug($"[WulaAI] VLM Response (truncated): {TruncateForLog(responseText)}");
}
return ExtractContent(responseText);
}
return sb.ToString();
}
private static string TruncateForLog(string s)
{
if (string.IsNullOrEmpty(s)) return s;
if (s.Length <= MaxLogChars) return s;
return s.Substring(0, MaxLogChars) + "... (truncated)";
}
}
}

View File

@@ -47,14 +47,14 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
return "Mod 设置未初始化。";
}
// 使用主 API 密钥(如果没有单独配置 VLM 密钥)
string vlmApiKey = !string.IsNullOrEmpty(settings.vlmApiKey) ? settings.vlmApiKey : settings.apiKey;
string vlmBaseUrl = !string.IsNullOrEmpty(settings.vlmBaseUrl) ? settings.vlmBaseUrl : "https://dashscope.aliyuncs.com/compatible-mode/v1";
string vlmModel = !string.IsNullOrEmpty(settings.vlmModel) ? settings.vlmModel : "qwen-vl-plus";
// 使用主 API 配置
string vlmApiKey = settings.apiKey;
string vlmBaseUrl = settings.baseUrl;
string vlmModel = settings.model;
if (string.IsNullOrEmpty(vlmApiKey))
{
return "VLM API 密钥未配置。请在 Mod 设置中配置 API 密钥。";
return " API 密钥未配置。请在 Mod 设置中配置。";
}
// 截取屏幕
@@ -64,15 +64,20 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools
return "截屏失败,无法分析屏幕。";
}
// 调用 VLM API
var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel);
// 调用 VLM API (使用统一的 GetChatCompletionAsync)
var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel, settings.useGeminiProtocol);
string result = await client.GetVisionCompletionAsync(
var messages = new System.Collections.Generic.List<(string role, string message)>
{
("user", instruction)
};
string result = await client.GetChatCompletionAsync(
BaseVisionSystemPrompt,
instruction,
base64Image,
maxTokens: 512, // 增加 token 数以支持更复杂的分析指令响应
temperature: 0.2f
messages,
maxTokens: 512,
temperature: 0.2f,
base64Image: base64Image
);
if (string.IsNullOrEmpty(result))

View File

@@ -59,6 +59,12 @@ namespace WulaFallenEmpire
listingStandard.Label("Wula_AISettings_Model".Translate());
settings.model = listingStandard.TextEntry(settings.model);
listingStandard.Gap(5f);
listingStandard.Label("<color=orange>API 协议格式:</color>");
if (listingStandard.RadioButton("OpenAI / 常用兼容格式 (默认)", !settings.useGeminiProtocol)) settings.useGeminiProtocol = false;
if (listingStandard.RadioButton("Google Gemini 原生格式", settings.useGeminiProtocol)) settings.useGeminiProtocol = true;
listingStandard.Gap(5f);
listingStandard.GapLine();
listingStandard.Label("Wula_AISettings_MaxContextTokens".Translate());
listingStandard.Label("Wula_AISettings_MaxContextTokensDesc".Translate());
@@ -68,35 +74,16 @@ namespace WulaFallenEmpire
listingStandard.GapLine();
listingStandard.CheckboxLabeled("Wula_EnableDebugLogs".Translate(), ref settings.enableDebugLogs, "Wula_EnableDebugLogsDesc".Translate());
// VLM 设置部分
// 视觉设置部分
listingStandard.GapLine();
listingStandard.Label("<color=cyan>VLM (视觉模型) 设置</color>");
listingStandard.Label("<color=cyan>视觉与多模态设置</color>");
listingStandard.CheckboxLabeled("启用 VLM 视觉功能", ref settings.enableVlmFeatures, "启用后 AI 可以「看到」游戏屏幕并分析");
listingStandard.CheckboxLabeled("启用视觉交互能力", ref settings.enableVlmFeatures, "启用后 AI 可以截取屏幕并理解游戏画面");
if (settings.enableVlmFeatures)
{
listingStandard.Label("VLM API Key:");
Rect vlmKeyRect = listingStandard.GetRect(30f);
Rect vlmPasswordRect = new Rect(vlmKeyRect.x, vlmKeyRect.y, vlmKeyRect.width - toggleWidth - 5f, vlmKeyRect.height);
Rect vlmToggleRect = new Rect(vlmKeyRect.xMax - toggleWidth, vlmKeyRect.y, toggleWidth, vlmKeyRect.height);
if (_showVlmApiKey)
{
settings.vlmApiKey = Widgets.TextField(vlmPasswordRect, settings.vlmApiKey ?? "");
}
else
{
settings.vlmApiKey = GUI.PasswordField(vlmPasswordRect, settings.vlmApiKey ?? "", '•');
}
Widgets.CheckboxLabeled(vlmToggleRect, "Show", ref _showVlmApiKey);
listingStandard.Gap(listingStandard.verticalSpacing);
listingStandard.Label("VLM Base URL:");
settings.vlmBaseUrl = listingStandard.TextEntry(settings.vlmBaseUrl ?? "https://dashscope.aliyuncs.com/compatible-mode/v1");
listingStandard.Label("VLM Model:");
settings.vlmModel = listingStandard.TextEntry(settings.vlmModel ?? "qwen-vl-max");
listingStandard.CheckboxLabeled("优先使用原生多模态模式", ref settings.useNativeMultimodal, "直接在思考阶段将截图发送给主模型(推荐,速度更快,需模型支持视角)");
listingStandard.CheckboxLabeled("在 UI 中显示中间思考过程", ref settings.showThinkingProcess, "显示 AI 执行工具时的状态反馈");
}
listingStandard.GapLine();

View File

@@ -7,28 +7,28 @@ namespace WulaFallenEmpire
public string apiKey = "sk-xxxxxxxx";
public string baseUrl = "https://api.deepseek.com";
public string model = "deepseek-chat";
public bool useGeminiProtocol = false; // 是否使用 Google Gemini 协议格式
public int maxContextTokens = 100000;
public bool enableDebugLogs = false;
// VLM (视觉语言模型) 配置
public string vlmApiKey = "";
public string vlmBaseUrl = "https://dashscope.aliyuncs.com/compatible-mode/v1";
public string vlmModel = "qwen-vl-plus";
// 视觉功能配置
public bool enableVlmFeatures = false;
public bool useNativeMultimodal = true; // 默认启用原生多模态
public bool showThinkingProcess = true; // 是否显示中间思考过过程
public override void ExposeData()
{
Scribe_Values.Look(ref apiKey, "apiKey", "sk-xxxxxxxx");
Scribe_Values.Look(ref baseUrl, "baseUrl", "https://api.deepseek.com");
Scribe_Values.Look(ref model, "model", "deepseek-chat");
Scribe_Values.Look(ref useGeminiProtocol, "useGeminiProtocol", false);
Scribe_Values.Look(ref maxContextTokens, "maxContextTokens", 100000);
Scribe_Values.Look(ref enableDebugLogs, "enableDebugLogs", false);
// VLM 配置
Scribe_Values.Look(ref vlmApiKey, "vlmApiKey", "");
Scribe_Values.Look(ref vlmBaseUrl, "vlmBaseUrl", "https://dashscope.aliyuncs.com/compatible-mode/v1");
Scribe_Values.Look(ref vlmModel, "vlmModel", "qwen-vl-plus");
// 简化后的视觉配置
Scribe_Values.Look(ref enableVlmFeatures, "enableVlmFeatures", false);
Scribe_Values.Look(ref useNativeMultimodal, "useNativeMultimodal", true);
Scribe_Values.Look(ref showThinkingProcess, "showThinkingProcess", true);
base.ExposeData();
}