using System;
using System.Collections.Generic;
using System.Threading.Tasks;
namespace WulaFallenEmpire.EventSystem.AI.Tools
{
///
/// VLM visual analysis tool.
///
public class Tool_AnalyzeScreen : AITool
{
public override string Name => "analyze_screen";
public override string Description =>
"Analyze the current game screen screenshot. Provide an instruction to guide the analysis.";
public override string UsageSchema => "{\"instruction\":\"Describe the current screen\"}";
private const string BaseVisionSystemPrompt = "You are a seasoned RimWorld assistant. Analyze the screenshot per instruction. Keep replies concise. Do not output tool call JSON unless explicitly asked.";
public override async Task ExecuteAsync(string args)
{
try
{
return await ExecuteInternalAsync(args);
}
catch (Exception ex)
{
WulaLog.Debug($"[Tool_AnalyzeScreen] Execute error: {ex}");
return $"Vision analysis error: {ex.Message}";
}
}
private async Task ExecuteInternalAsync(string jsonContent)
{
var argsDict = ParseJsonArgs(jsonContent);
string instruction = TryGetString(argsDict, "instruction", out var inst) ? inst :
(TryGetString(argsDict, "context", out var ctx) ? ctx : "Describe the current screen, focusing on UI state and key entities.");
try
{
// Check VLM settings
var settings = WulaFallenEmpireMod.settings;
if (settings == null)
{
return "Mod settings not initialized.";
}
string vlmApiKey = settings.useGeminiProtocol ? settings.geminiApiKey : settings.apiKey;
string vlmBaseUrl = settings.useGeminiProtocol ? settings.geminiBaseUrl : settings.baseUrl;
string vlmModel = settings.useGeminiProtocol ? settings.geminiModel : settings.model;
if (string.IsNullOrEmpty(vlmApiKey))
{
return "API key not configured. Please configure it in Mod settings.";
}
string base64Image = ScreenCaptureUtility.CaptureScreenAsBase64();
if (string.IsNullOrEmpty(base64Image))
{
return "Screenshot capture failed; cannot analyze screen.";
}
var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel, settings.useGeminiProtocol);
var messages = new List<(string role, string message)>
{
("user", instruction)
};
string result = await client.GetChatCompletionAsync(
BaseVisionSystemPrompt,
messages,
maxTokens: 512,
temperature: 0.2f,
base64Image: base64Image
);
if (string.IsNullOrEmpty(result))
{
return "Vision analysis produced no response. Check API settings.";
}
return $"Screen analysis result: {result.Trim()}";
}
catch (Exception ex)
{
WulaLog.Debug($"[Tool_AnalyzeScreen] Error: {ex}");
return $"Vision analysis error: {ex.Message}";
}
}
}
}