From b906a468b611ca20b27e5cc0629c83b9cbd2158b Mon Sep 17 00:00:00 2001 From: "ProjectKoi-Kalo\\Kalo" Date: Wed, 31 Dec 2025 01:45:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B7=B2=E6=8A=8A=E5=B7=A5=E5=85=B7=E8=B0=83?= =?UTF-8?q?=E7=94=A8=E4=BB=8E=20XML=20=E6=94=B9=E6=88=90=20OpenAI=20?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=20JSON=EF=BC=8C=E5=B9=B6=E7=BB=9F=E4=B8=80?= =?UTF-8?q?=E8=A7=A3=E6=9E=90/=E6=89=A7=E8=A1=8C=E6=B5=81=E7=A8=8B?= =?UTF-8?q?=E3=80=82=E6=94=B9=E5=8A=A8=E6=A6=82=E8=A7=88=E5=A6=82=E4=B8=8B?= =?UTF-8?q?=EF=BC=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增 JSON tool_calls 解析/序列化并替换核心执行与提示词为 JSON-only:JsonToolCallParser.cs、AIIntelligenceCore.cs 工具基类移除 XML 解析,统一 JSON 参数读取与类型转换辅助:AITool.cs 工具实现统一 JSON args/UsageSchema(含重写/修复):Tool_ModifyGoodwill.cs、Tool_SendReinforcement.cs、Tool_GetMapPawns.cs、Tool_GetMapResources.cs、Tool_GetAvailablePrefabs.cs、Tool_CallPrefabAirdrop.cs、Tool_CallBombardment.cs、Tool_GetAvailableBombardments.cs、Tool_GetPawnStatus.cs、Tool_GetRecentNotifications.cs、Tool_SearchThingDef.cs、Tool_SearchPawnKind.cs、Tool_ChangeExpression.cs、Tool_SetOverwatchMode.cs、Tool_RememberFact.cs、Tool_RecallMemories.cs、Tool_SpawnResources.cs、Tool_AnalyzeScreen.cs 轰炸相关解析统一到 JSON 字典并增强数值解析:BombardmentUtility.cs UI 对话展示改为剥离 JSON tool_calls:Overlay_WulaLink.cs、Dialog_AIConversation.cs --- .../EventSystem/AI/AIIntelligenceCore.cs | 313 +-- .../EventSystem/AI/Tools/AITool.cs | 147 +- .../AI/Tools/BombardmentUtility.cs | 112 +- .../AI/Tools/Tool_AnalyzeScreen.cs | 66 +- .../AI/Tools/Tool_CallBombardment.cs | 15 +- .../AI/Tools/Tool_CallPrefabAirdrop.cs | 25 +- .../AI/Tools/Tool_ChangeExpression.cs | 22 +- .../AI/Tools/Tool_GetAvailableBombardments.cs | 2 +- .../AI/Tools/Tool_GetAvailablePrefabs.cs | 2 +- .../EventSystem/AI/Tools/Tool_GetMapPawns.cs | 17 +- .../AI/Tools/Tool_GetMapResources.cs | 16 +- .../AI/Tools/Tool_GetPawnStatus.cs | 10 +- .../AI/Tools/Tool_GetRecentNotifications.cs | 32 +- .../AI/Tools/Tool_ModifyGoodwill.cs | 23 +- .../AI/Tools/Tool_RecallMemories.cs | 14 +- .../EventSystem/AI/Tools/Tool_RememberFact.cs | 10 +- .../AI/Tools/Tool_SearchPawnKind.cs | 18 +- .../AI/Tools/Tool_SearchThingDef.cs | 19 +- .../AI/Tools/Tool_SendReinforcement.cs | 14 +- .../AI/Tools/Tool_SetOverwatchMode.cs | 14 +- .../AI/Tools/Tool_SpawnResources.cs | 147 +- .../AI/UI/Dialog_AIConversation.cs | 11 +- .../EventSystem/AI/UI/Overlay_WulaLink.cs | 18 +- .../AI/Utils/JsonToolCallParser.cs | 516 +++++ .../WulaAI_Gemini_Integration_Handover.md | 83 - .../WulaAI_DevDocs/deepseek/JsonOutput.md | 58 + .../WulaAI_DevDocs/deepseek/ToolCalls.md | 273 +++ .../Function-calling-with-the-Gemini-API.md | 1511 ++++++++++++++ .../google/Structured-Outputs.md | 533 +++++ .../openai/Structured-outputs.md | 1844 +++++++++++++++++ .../WulaAI_DevDocs/openai/function-calling.md | 1052 ++++++++++ llama.cpp | 1 + 32 files changed, 6396 insertions(+), 542 deletions(-) create mode 100644 Source/WulaFallenEmpire/EventSystem/AI/Utils/JsonToolCallParser.cs delete mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md create mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/JsonOutput.md create mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/ToolCalls.md create mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/google/Function-calling-with-the-Gemini-API.md create mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/google/Structured-Outputs.md create mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/openai/Structured-outputs.md create mode 100644 Source/WulaFallenEmpire/WulaAI_DevDocs/openai/function-calling.md create mode 160000 llama.cpp diff --git a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs index 74670859..0fc49193 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/AIIntelligenceCore.cs @@ -10,6 +10,7 @@ using UnityEngine; using Verse; using WulaFallenEmpire; using WulaFallenEmpire.EventSystem.AI.Tools; +using WulaFallenEmpire.EventSystem.AI.Utils; namespace WulaFallenEmpire.EventSystem.AI { @@ -95,20 +96,18 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori private const string ToolRulesInstruction = @" # TOOL USE RULES -1. **FORMATTING**: Tool calls MUST use the specified XML format. The tool name is the root tag, and each parameter is a child tag. - - value - +1. **FORMATTING**: Tool calls MUST be valid JSON using the following schema: + { ""tool_calls"": [ { ""type"": ""function"", ""function"": { ""name"": ""tool_name"", ""arguments"": { ... } } } ] } 2. **STRICT OUTPUT**: - Your output MUST be either: - - One or more XML tool calls (no extra text), OR - - Exactly: + - A JSON object with ""tool_calls"" (may be empty), OR + - Exactly: { ""tool_calls"": [] } Do NOT include any natural language, explanation, markdown, or additional commentary. 3. **MULTI-REQUEST RULE**: - If the user requests multiple items or information, you MUST output ALL required tool calls in the SAME tool-phase response. - Do NOT split multi-item requests across turns. 4. **TOOLS**: You MAY call any tools listed in ""# TOOLS (AVAILABLE)"". -5. **ANTI-HALLUCINATION**: Never invent tools, parameters, defNames, coordinates, or tool results. If a tool is needed but not available, use and proceed to the next phase."; +5. **ANTI-HALLUCINATION**: Never invent tools, parameters, defNames, coordinates, or tool results. If a tool is needed but not available, output { ""tool_calls"": [] } and proceed to the next phase."; public AIIntelligenceCore(World world) : base(world) { @@ -668,11 +667,11 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori if (!toolsEnabled) { return $"{fullInstruction}\n{goodwillContext}\nIMPORTANT: You MUST reply in the following language: {language}.\n" + - "IMPORTANT: Tool calls are DISABLED in this turn. Reply in natural language only. Do NOT output any XML. " + + "IMPORTANT: Tool calls are DISABLED in this turn. Reply in natural language only. Do NOT output any tool call JSON. " + "You MAY include [EXPR:n] to set your expression (n=1-6)."; } - return $"{fullInstruction}\n{goodwillContext}\nIMPORTANT: Output XML tool calls only (or ). " + + return $"{fullInstruction}\n{goodwillContext}\nIMPORTANT: Output JSON tool calls only (or {\"tool_calls\": []}). " + $"You will produce the natural-language reply later and MUST use: {language}."; } @@ -707,7 +706,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "- modify_goodwill\n" + "- call_prefab_airdrop\n" + "- set_overwatch_mode\n" + - "If no action is required, output exactly: .\n" + + "If no action is required, output exactly: { \"tool_calls\": [] }.\n" + "Query tools exist but are disabled in this phase (not listed here).\n" : string.Empty; @@ -716,14 +715,14 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori phaseInstruction += "\n- NATIVE MULTIMODAL: A current screenshot of the game is attached to this request. You can see the game state directly. Use it to determine coordinates for visual tools or to understand the context."; if (phase == RequestPhase.ActionTools) { - phaseInstruction += "\n- VISUAL PHASE RULE: This phase is for ACTIONS only. If you want to describe the screen to the user, wait for the next phase (Reply Phase). Output XML actions only here."; + phaseInstruction += "\n- VISUAL PHASE RULE: This phase is for ACTIONS only. If you want to describe the screen to the user, wait for the next phase (Reply Phase). Output JSON tool calls only here."; } } string actionWhitelist = phase == RequestPhase.ActionTools - ? "ACTION PHASE VALID TAGS ONLY:\n" + - ", , , , , , , \n" + - "INVALID EXAMPLES (do NOT use now): , , , , \n" + ? "ACTION PHASE VALID TOOLS ONLY:\n" + + "spawn_resources, send_reinforcement, call_bombardment, modify_goodwill, call_prefab_airdrop, set_overwatch_mode, remember_fact\n" + + "INVALID EXAMPLES (do NOT use now): get_map_resources, analyze_screen, search_thing_def, search_pawn_kind, recall_memories\n" : string.Empty; return string.Join("\n\n", new[] @@ -755,7 +754,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori sb.AppendLine("===="); sb.AppendLine(); sb.AppendLine("# TOOLS (AVAILABLE)"); - sb.AppendLine("Use XML tool calls only, or if no tools are needed."); + sb.AppendLine("Use JSON tool calls only, or {\"tool_calls\": []} if no tools are needed."); sb.AppendLine(); foreach (var tool in available) @@ -784,62 +783,58 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori "Goal: Gather info needed for decisions.\n" + "Rules:\n" + "- You MUST NOT write any natural language to the user in this phase.\n" + - "- Output XML tool calls only, or exactly: .\n" + + "- Output JSON tool calls only, or exactly: {\"tool_calls\": []}.\n" + "- Prefer query tools (get_*/search_*).\n" + - "- CRITICAL: If the user asks for an ITEM (e.g. 'Reviver Mech Serum'), you MUST use ... to find its exact DefName. NEVER GUESS DefNames.\n" + + "- CRITICAL: If the user asks for an ITEM (e.g. 'Reviver Mech Serum'), you MUST use search_thing_def with {\"query\":\"...\"} to find its exact DefName. NEVER GUESS DefNames.\n" + "- You MAY call multiple tools in one response, but keep it concise.\n" + "- If the user requests multiple items or information, you MUST output ALL required tool calls in this SAME response.\n" + "- Action tools are available in PHASE 2 only; do NOT use them here.\n" + "After this phase, the game will automatically proceed to PHASE 2.\n" + - "Output: XML only.\n", + "Output: JSON only.\n", RequestPhase.ActionTools => "# PHASE 2/3 (Action Tools)\n" + "Goal: Execute in-game actions based on known info.\n" + "Rules:\n" + "- You MUST NOT write any natural language to the user in this phase.\n" + - "- Output XML tool calls only, or exactly: .\n" + + "- Output JSON tool calls only, or exactly: {\"tool_calls\": []}.\n" + "- ONLY action tools are accepted in this phase (spawn_resources, send_reinforcement, call_bombardment, modify_goodwill, call_prefab_airdrop).\n" + "- Query tools (get_*/search_*) will be ignored.\n" + "- Prefer action tools (spawn_resources, send_reinforcement, call_bombardment, modify_goodwill).\n" + "- Avoid queries unless absolutely required.\n" + - "- If no action is required based on query results, output .\n" + - "- If you already executed the needed action earlier this turn, output .\n" + + "- If no action is required based on query results, output {\"tool_calls\": []}.\n" + + "- If you already executed the needed action earlier this turn, output {\"tool_calls\": []}.\n" + "After this phase, the game will automatically proceed to PHASE 3.\n" + - "Output: XML only.\n", + "Output: JSON only.\n", RequestPhase.Reply => "# PHASE 3/3 (Reply)\n" + "Goal: Reply to the player.\n" + "Rules:\n" + "- Tool calls are DISABLED.\n" + "- You MUST write natural language only.\n" + - "- Do NOT output any XML.\n" + + "- Do NOT output any tool call JSON.\n" + "- If you want to set your expression, include: [EXPR:n] (n=1-6).\n", _ => "" }; } - private static bool IsXmlToolCall(string response) + private static bool IsToolCallJson(string response) { if (string.IsNullOrWhiteSpace(response)) return false; - return Regex.IsMatch(response, @"<(?!/?(i|b|color|size|material)\b)([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); + return JsonToolCallParser.TryParseToolCallsFromText(response, out _, out _); } private static bool IsNoActionOnly(string response) { - if (string.IsNullOrWhiteSpace(response)) return false; - var matches = Regex.Matches(response, @"<([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); - return matches.Count == 1 && - matches[0].Groups[1].Value.Equals("no_action", StringComparison.OrdinalIgnoreCase); + if (!JsonToolCallParser.TryParseToolCallsFromText(response, out var toolCalls, out _)) return false; + return toolCalls.Count == 0; } private static bool HasActionToolCall(string response) { - if (string.IsNullOrWhiteSpace(response)) return false; - var matches = Regex.Matches(response, @"<([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); - foreach (Match match in matches) + if (!JsonToolCallParser.TryParseToolCallsFromText(response, out var toolCalls, out _)) return false; + foreach (var call in toolCalls) { - var toolName = match.Groups[1].Value; - if (IsActionToolName(toolName)) + if (IsActionToolName(call.Name)) { return true; } @@ -850,8 +845,15 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori private static bool ShouldRetryTools(string response) { if (string.IsNullOrWhiteSpace(response)) return false; - return Regex.IsMatch(response, @"<\s*retry_tools\s*/\s*>", RegexOptions.IgnoreCase) || - Regex.IsMatch(response, @"<\s*retry_tools\s*>", RegexOptions.IgnoreCase); + if (!JsonToolCallParser.TryParseObject(response, out var obj)) return false; + if (obj.TryGetValue("retry_tools", out object raw) && raw != null) + { + if (raw is bool b) return b; + if (raw is string s && bool.TryParse(s, out bool parsed)) return parsed; + if (raw is long l) return l != 0; + if (raw is double d) return Math.Abs(d) > 0.0001; + } + return false; } private static int MaxToolsPerPhase(RequestPhase phase) @@ -981,7 +983,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori continue; } - // Revert UI filtering: Add assistant messages directly without stripping XML for history context + // Revert UI filtering: Add assistant messages directly without stripping tool call JSON for history context filtered.Add(entry); } @@ -1480,12 +1482,18 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return value.Replace("\\", "\\\\").Replace("\"", "\\\"").Replace("\n", "\\n").Replace("\r", "\\r"); } - private static string StripXmlTags(string text) + private static string StripToolCallJson(string text) { if (string.IsNullOrEmpty(text)) return text; - string stripped = Regex.Replace(text, @"<(?!/?(i|b|color|size|material)\b)([a-zA-Z0-9_]+)[^>]*>.*?", "", RegexOptions.Singleline); - stripped = Regex.Replace(stripped, @"<([a-zA-Z0-9_]+)[^>]*/>", ""); - return stripped; + if (!JsonToolCallParser.TryParseToolCallsFromText(text, out _, out string fragment)) + { + return text; + } + + int index = text.IndexOf(fragment, StringComparison.Ordinal); + if (index < 0) return text; + string cleaned = text.Remove(index, fragment.Length); + return cleaned.Trim(); } private string StripExpressionTags(string text) @@ -1570,7 +1578,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori var client = new SimpleAIClient(apiKey, baseUrl, model, settings.useGeminiProtocol); _currentClient = client; - // Model-Driven Vision: Start with null image. The model must ask for it using or if needed. + // Model-Driven Vision: Start with null image. The model must request it using analyze_screen or capture_screen if needed. string base64Image = null; @@ -1588,16 +1596,16 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - if (!IsXmlToolCall(queryResponse)) + if (!IsToolCallJson(queryResponse)) { if (Prefs.DevMode) { - WulaLog.Debug("[WulaAI] Turn 1/3 missing XML; treating as "); + WulaLog.Debug("[WulaAI] Turn 1/3 missing JSON tool calls; treating as no_action."); } - queryResponse = ""; + queryResponse = "{\"tool_calls\": []}"; } - PhaseExecutionResult queryResult = await ExecuteXmlToolsForPhase(queryResponse, queryPhase); + PhaseExecutionResult queryResult = await ExecuteJsonToolsForPhase(queryResponse, queryPhase); // DATA FLOW: If Query Phase captured an image, propagate it to subsequent phases. if (!string.IsNullOrEmpty(queryResult.CapturedImage)) @@ -1613,9 +1621,9 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori string retryInstruction = persona + "\n\n# RETRY DECISION\n" + "No successful tool calls occurred in PHASE 1 (Query).\n" + - "If you need to use tools in PHASE 1, output exactly: .\n" + - "If you will proceed without actions, output exactly: .\n" + - "Output the XML tag only and NOTHING else.\n" + + "If you need to use tools in PHASE 1, output exactly: {\"retry_tools\": true}.\n" + + "If you will proceed without actions, output exactly: {\"retry_tools\": false}.\n" + + "Output JSON only and NOTHING else.\n" + "\nLast user request:\n" + lastUserMessage; string retryDecision = await client.GetChatCompletionAsync(retryInstruction, new List<(string role, string message)>(), maxTokens: 256, temperature: 0.1f); @@ -1628,7 +1636,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori SetThinkingPhase(1, true); string retryQueryInstruction = GetToolSystemInstruction(queryPhase, !string.IsNullOrEmpty(base64Image)) + - "\n\n# RETRY\nYou chose to retry. Output XML tool calls only (or )."; + "\n\n# RETRY\nYou chose to retry. Output JSON tool calls only (or {\"tool_calls\": []})."; string retryQueryResponse = await client.GetChatCompletionAsync(retryQueryInstruction, BuildToolContext(queryPhase), maxTokens: 2048, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(retryQueryResponse)) { @@ -1636,15 +1644,15 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - if (!IsXmlToolCall(retryQueryResponse)) + if (!IsToolCallJson(retryQueryResponse)) { if (Prefs.DevMode) { - WulaLog.Debug("[WulaAI] Retry query phase missing XML; treating as "); + WulaLog.Debug("[WulaAI] Retry query phase missing JSON tool calls; treating as no_action."); } - retryQueryResponse = ""; + retryQueryResponse = "{\"tool_calls\": []}"; } - queryResult = await ExecuteXmlToolsForPhase(retryQueryResponse, queryPhase); + queryResult = await ExecuteJsonToolsForPhase(retryQueryResponse, queryPhase); } } @@ -1665,33 +1673,28 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - bool actionHasXml = IsXmlToolCall(actionResponse); - bool actionIsNoActionOnly = IsNoActionOnly(actionResponse); - bool actionHasActionTool = actionHasXml && HasActionToolCall(actionResponse); - if (!actionHasXml || (!actionHasActionTool && !actionIsNoActionOnly)) + bool actionHasJson = IsToolCallJson(actionResponse); + bool actionIsNoActionOnly = actionHasJson && IsNoActionOnly(actionResponse); + bool actionHasActionTool = actionHasJson && HasActionToolCall(actionResponse); + if (!actionHasJson || (!actionHasActionTool && !actionIsNoActionOnly)) { if (Prefs.DevMode) { - WulaLog.Debug("[WulaAI] Turn 2/3 missing XML or no action tool; attempting XML-only conversion."); + WulaLog.Debug("[WulaAI] Turn 2/3 missing JSON or no action tool; attempting JSON-only conversion."); } - string fixInstruction = "# FORMAT FIX (ACTION XML ONLY)\n" + + string fixInstruction = "# FORMAT FIX (ACTION JSON ONLY)\n" + "Preserve the intent of the previous output.\n" + - "If the previous output indicates no action is needed or refuses action, output exactly: .\n" + + "If the previous output indicates no action is needed or refuses action, output exactly: {\"tool_calls\": []}.\n" + "Do NOT invent new actions.\n" + - "Output VALID XML tool calls only. No natural language, no commentary.\nIgnore any non-XML text.\n" + - "Allowed tags: , , , , , .\n" + - "\nAction tool XML formats:\n" + - "- DefNameInt\n" + - "- PawnKindDef: Count, ...\n" + - "- DefNameIntInt\n" + - "- Int\n" + - "- DefNameIntInt\n" + + "Output VALID JSON tool calls only. No natural language, no commentary.\nIgnore any non-JSON text.\n" + + "Allowed tools: spawn_resources, send_reinforcement, call_bombardment, modify_goodwill, call_prefab_airdrop, set_overwatch_mode, remember_fact.\n" + + "Schema: {\"tool_calls\":[{\"type\":\"function\",\"function\":{\"name\":\"tool_name\",\"arguments\":{...}}}]}\n" + "\nPrevious output:\n" + TrimForPrompt(actionResponse, 600); string fixedResponse = await client.GetChatCompletionAsync(fixInstruction, actionContext, maxTokens: 2048, temperature: 0.1f); - bool fixedHasXml = !string.IsNullOrEmpty(fixedResponse) && IsXmlToolCall(fixedResponse); - bool fixedIsNoActionOnly = fixedHasXml && IsNoActionOnly(fixedResponse); - bool fixedHasActionTool = fixedHasXml && HasActionToolCall(fixedResponse); - if (fixedHasXml && (fixedHasActionTool || fixedIsNoActionOnly)) + bool fixedHasJson = !string.IsNullOrEmpty(fixedResponse) && IsToolCallJson(fixedResponse); + bool fixedIsNoActionOnly = fixedHasJson && IsNoActionOnly(fixedResponse); + bool fixedHasActionTool = fixedHasJson && HasActionToolCall(fixedResponse); + if (fixedHasJson && (fixedHasActionTool || fixedIsNoActionOnly)) { actionResponse = fixedResponse; } @@ -1699,12 +1702,12 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori { if (Prefs.DevMode) { - WulaLog.Debug("[WulaAI] Turn 2/3 conversion failed; treating as "); + WulaLog.Debug("[WulaAI] Turn 2/3 conversion failed; treating as no_action."); } - actionResponse = ""; + actionResponse = "{\"tool_calls\": []}"; } } - PhaseExecutionResult actionResult = await ExecuteXmlToolsForPhase(actionResponse, actionPhase); + PhaseExecutionResult actionResult = await ExecuteJsonToolsForPhase(actionResponse, actionPhase); if (!actionResult.AnyActionSuccess && !_actionRetryUsed) { _actionRetryUsed = true; @@ -1713,9 +1716,9 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori string retryInstruction = persona + "\n\n# RETRY DECISION\n" + "No successful action tools occurred in PHASE 2 (Action).\n" + - "If you need to execute an in-game action, output exactly: .\n" + - "If you will proceed without actions, output exactly: .\n" + - "Output the XML tag only and NOTHING else.\n" + + "If you need to execute an in-game action, output exactly: {\"retry_tools\": true}.\n" + + "If you will proceed without actions, output exactly: {\"retry_tools\": false}.\n" + + "Output JSON only and NOTHING else.\n" + "\nLast user request:\n" + lastUserMessage; string retryDecision = await client.GetChatCompletionAsync(retryInstruction, new List<(string role, string message)>(), maxTokens: 256, temperature: 0.1f); @@ -1728,7 +1731,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori SetThinkingPhase(2, true); string retryActionInstruction = GetToolSystemInstruction(actionPhase, !string.IsNullOrEmpty(base64Image)) + - "\n\n# RETRY\nYou chose to retry. Output XML tool calls only (or )."; + "\n\n# RETRY\nYou chose to retry. Output JSON tool calls only (or {\"tool_calls\": []})."; var retryActionContext = BuildToolContext(actionPhase, includeUser: true); string retryActionResponse = await client.GetChatCompletionAsync(retryActionInstruction, retryActionContext, maxTokens: 2048, temperature: 0.1f, base64Image: base64Image); if (string.IsNullOrEmpty(retryActionResponse)) @@ -1737,30 +1740,25 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - if (!IsXmlToolCall(retryActionResponse)) + if (!IsToolCallJson(retryActionResponse)) { if (Prefs.DevMode) { - WulaLog.Debug("[WulaAI] Retry action phase missing XML; attempting XML-only conversion."); + WulaLog.Debug("[WulaAI] Retry action phase missing JSON; attempting JSON-only conversion."); } - string retryFixInstruction = "# FORMAT FIX (ACTION XML ONLY)\n" + + string retryFixInstruction = "# FORMAT FIX (ACTION JSON ONLY)\n" + "Preserve the intent of the previous output.\n" + - "If the previous output indicates no action is needed or refuses action, output exactly: .\n" + + "If the previous output indicates no action is needed or refuses action, output exactly: {\"tool_calls\": []}.\n" + "Do NOT invent new actions.\n" + - "Output VALID XML tool calls only. No natural language, no commentary.\nIgnore any non-XML text.\n" + - "Allowed tags: , , , , , .\n" + - "\nAction tool XML formats:\n" + - "- DefNameInt\n" + - "- PawnKindDef: Count, ...\n" + - "- DefNameIntInt\n" + - "- Int\n" + - "- DefNameIntInt\n" + + "Output VALID JSON tool calls only. No natural language, no commentary.\nIgnore any non-JSON text.\n" + + "Allowed tools: spawn_resources, send_reinforcement, call_bombardment, modify_goodwill, call_prefab_airdrop, set_overwatch_mode, remember_fact.\n" + + "Schema: {\"tool_calls\":[{\"type\":\"function\",\"function\":{\"name\":\"tool_name\",\"arguments\":{...}}}]}\n" + "\nPrevious output:\n" + TrimForPrompt(retryActionResponse, 600); string retryFixedResponse = await client.GetChatCompletionAsync(retryFixInstruction, retryActionContext, maxTokens: 2048, temperature: 0.1f); - bool retryFixedHasXml = !string.IsNullOrEmpty(retryFixedResponse) && IsXmlToolCall(retryFixedResponse); - bool retryFixedIsNoActionOnly = retryFixedHasXml && IsNoActionOnly(retryFixedResponse); - bool retryFixedHasActionTool = retryFixedHasXml && HasActionToolCall(retryFixedResponse); - if (retryFixedHasXml && (retryFixedHasActionTool || retryFixedIsNoActionOnly)) + bool retryFixedHasJson = !string.IsNullOrEmpty(retryFixedResponse) && IsToolCallJson(retryFixedResponse); + bool retryFixedIsNoActionOnly = retryFixedHasJson && IsNoActionOnly(retryFixedResponse); + bool retryFixedHasActionTool = retryFixedHasJson && HasActionToolCall(retryFixedResponse); + if (retryFixedHasJson && (retryFixedHasActionTool || retryFixedIsNoActionOnly)) { retryActionResponse = retryFixedResponse; } @@ -1768,13 +1766,13 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori { if (Prefs.DevMode) { - WulaLog.Debug("[WulaAI] Retry action conversion failed; treating as "); + WulaLog.Debug("[WulaAI] Retry action conversion failed; treating as no_action."); } - retryActionResponse = ""; + retryActionResponse = "{\"tool_calls\": []}"; } } - actionResult = await ExecuteXmlToolsForPhase(retryActionResponse, actionPhase); + actionResult = await ExecuteJsonToolsForPhase(retryActionResponse, actionPhase); } } @@ -1826,29 +1824,29 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return; } - bool replyHadXml = IsXmlToolCall(reply); - string strippedReply = StripXmlTags(reply)?.Trim() ?? ""; - if (replyHadXml || string.IsNullOrWhiteSpace(strippedReply)) + bool replyHadToolCalls = IsToolCallJson(reply); + string strippedReply = StripToolCallJson(reply)?.Trim() ?? ""; + if (replyHadToolCalls || string.IsNullOrWhiteSpace(strippedReply)) { string retryReplyInstruction = replyInstruction + "\n\n# RETRY (REPLY OUTPUT)\n" + - "Your last reply included XML or was empty. Tool calls are DISABLED.\n" + - "You MUST reply in natural language only. Do NOT output any XML.\n"; + "Your last reply included tool call JSON or was empty. Tool calls are DISABLED.\n" + + "You MUST reply in natural language only. Do NOT output any tool call JSON.\n"; string retryReply = await client.GetChatCompletionAsync(retryReplyInstruction, BuildReplyHistory(), maxTokens: 256, temperature: 0.3f); if (!string.IsNullOrEmpty(retryReply)) { reply = retryReply; - replyHadXml = IsXmlToolCall(reply); - strippedReply = StripXmlTags(reply)?.Trim() ?? ""; + replyHadToolCalls = IsToolCallJson(reply); + strippedReply = StripToolCallJson(reply)?.Trim() ?? ""; } } - if (replyHadXml) + if (replyHadToolCalls) { - string cleaned = StripXmlTags(reply)?.Trim() ?? ""; + string cleaned = StripToolCallJson(reply)?.Trim() ?? ""; if (string.IsNullOrWhiteSpace(cleaned)) { - cleaned = "(system) AI reply returned tool XML only and was discarded. Please retry or send /clear to reset context."; + cleaned = "(system) AI reply returned tool call JSON only and was discarded. Please retry or send /clear to reset context."; } reply = cleaned; } @@ -1866,7 +1864,7 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori SetThinkingState(false); } } - private async Task ExecuteXmlToolsForPhase(string xml, RequestPhase phase) + private async Task ExecuteJsonToolsForPhase(string json, RequestPhase phase) { if (phase == RequestPhase.Reply) { @@ -1874,14 +1872,23 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori return default; } - string guidance = "ToolRunner Guidance: Reply to the player in natural language only. Do NOT output any XML. You may include [EXPR:n] to set expression (n=1-6)."; + string guidance = "ToolRunner Guidance: Reply to the player in natural language only. Do NOT output any tool call JSON. You may include [EXPR:n] to set expression (n=1-6)."; - var matches = Regex.Matches(xml ?? "", @"<([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); - - if (matches.Count == 0 || (matches.Count == 1 && matches[0].Groups[1].Value.Equals("no_action", StringComparison.OrdinalIgnoreCase))) + if (!JsonToolCallParser.TryParseToolCallsFromText(json ?? "", out var toolCalls, out string jsonFragment)) { UpdatePhaseToolLedger(phase, false, new List()); - _history.Add(("toolcall", "")); + _history.Add(("toolcall", "{\"tool_calls\": []}")); + _history.Add(("tool", $"[Tool Results]\nTool 'no_action' Result: No action taken.\n{guidance}")); + PersistHistory(); + UpdateActionLedgerNote(); + await Task.CompletedTask; + return default; + } + + if (toolCalls.Count == 0) + { + UpdatePhaseToolLedger(phase, false, new List()); + _history.Add(("toolcall", "{\"tool_calls\": []}")); _history.Add(("tool", $"[Tool Results]\nTool 'no_action' Result: No action taken.\n{guidance}")); PersistHistory(); UpdateActionLedgerNote(); @@ -1897,43 +1904,58 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori var successfulActions = new List(); var failedActions = new List(); var nonActionToolsInActionPhase = new List(); + var historyCalls = new List>(); StringBuilder combinedResults = new StringBuilder(); - StringBuilder xmlOnlyBuilder = new StringBuilder(); string capturedImageForPhase = null; bool countActionSuccessOnly = phase == RequestPhase.ActionTools; - foreach (Match match in matches) + foreach (var call in toolCalls) { if (executed >= maxTools) { - combinedResults.AppendLine($"ToolRunner Note: Skipped remaining tools because this phase allows at most {maxTools} tool call(s)." ); + combinedResults.AppendLine($"ToolRunner Note: Skipped remaining tools because this phase allows at most {maxTools} tool call(s)."); break; } - string toolCallXml = match.Value; - string toolName = match.Groups[1].Value; - - if (toolName.Equals("no_action", StringComparison.OrdinalIgnoreCase)) + string toolName = call.Name; + if (string.IsNullOrWhiteSpace(toolName)) { - combinedResults.AppendLine("ToolRunner Note: Ignored because other tool calls were present."); + executed++; continue; } - if (toolName.Equals("analyze_screen", StringComparison.OrdinalIgnoreCase) || toolName.Equals("capture_screen", StringComparison.OrdinalIgnoreCase)) + if (string.Equals(toolName, "no_action", StringComparison.OrdinalIgnoreCase)) { - // Intercept Vision Request: Capture screen and return it. - // We skip the tool's internal execution to save time/tokens, as the purpose is just to get the image into the context. - capturedImageForPhase = ScreenCaptureUtility.CaptureScreenAsBase64(); - combinedResults.AppendLine($"Tool '{toolName}' Result: Screen captured successfully. Context updated for next phase."); - successfulToolCall = true; - successfulTools.Add(toolName); - executed++; - continue; + combinedResults.AppendLine("ToolRunner Note: Ignored 'no_action' tool because other tool calls were present."); + executed++; + continue; } - if (xmlOnlyBuilder.Length > 0) xmlOnlyBuilder.AppendLine().AppendLine(); - xmlOnlyBuilder.Append(toolCallXml); + var historyCall = new Dictionary + { + ["type"] = "function", + ["function"] = new Dictionary + { + ["name"] = toolName, + ["arguments"] = call.Arguments ?? new Dictionary(StringComparer.OrdinalIgnoreCase) + } + }; + if (!string.IsNullOrWhiteSpace(call.Id)) + { + historyCall["id"] = call.Id; + } + historyCalls.Add(historyCall); + + if (toolName.Equals("analyze_screen", StringComparison.OrdinalIgnoreCase) || toolName.Equals("capture_screen", StringComparison.OrdinalIgnoreCase)) + { + capturedImageForPhase = ScreenCaptureUtility.CaptureScreenAsBase64(); + combinedResults.AppendLine($"Tool '{toolName}' Result: Screen captured successfully. Context updated for next phase."); + successfulToolCall = true; + successfulTools.Add(toolName); + executed++; + continue; + } if (phase == RequestPhase.ActionTools && IsQueryToolName(toolName)) { @@ -1952,19 +1974,13 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori continue; } - string argsXml = toolCallXml; - var contentMatch = Regex.Match(toolCallXml, $@"<{toolName}>(.*?)", RegexOptions.Singleline); - if (contentMatch.Success) - { - argsXml = contentMatch.Groups[1].Value; - } - + string argsJson = call.ArgumentsJson ?? "{}"; if (Prefs.DevMode) { - WulaLog.Debug($"[WulaAI] Executing tool (phase {phase}): {toolName} with args: {argsXml}"); + WulaLog.Debug($"[WulaAI] Executing tool (phase {phase}): {toolName} with args: {argsJson}"); } - string result = (await tool.ExecuteAsync(argsXml)).Trim(); + string result = (await tool.ExecuteAsync(argsJson)).Trim(); bool isError = !string.IsNullOrEmpty(result) && result.StartsWith("Error:", StringComparison.OrdinalIgnoreCase); if (toolName == "modify_goodwill") { @@ -2009,10 +2025,9 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori executed++; } - string nonXmlText = StripXmlTags(xml); - if (!string.IsNullOrWhiteSpace(nonXmlText)) + if (!string.IsNullOrWhiteSpace(jsonFragment) && !string.Equals((json ?? "").Trim(), jsonFragment, StringComparison.Ordinal)) { - combinedResults.AppendLine("ToolRunner Note: Non-XML text in the tool phase was ignored."); + combinedResults.AppendLine("ToolRunner Note: Non-JSON text in the tool phase was ignored."); } if (phase == RequestPhase.ActionTools && nonActionToolsInActionPhase.Count > 0) { @@ -2032,8 +2047,10 @@ You are 'The Legion', a super AI of the Wula Empire. Your personality is authori } combinedResults.AppendLine(guidance); - string xmlOnly = xmlOnlyBuilder.Length == 0 ? "" : xmlOnlyBuilder.ToString().Trim(); - _history.Add(("toolcall", xmlOnly)); + string toolCallsJson = historyCalls.Count == 0 + ? "{\"tool_calls\": []}" + : JsonToolCallParser.SerializeToJson(new Dictionary { ["tool_calls"] = historyCalls }); + _history.Add(("toolcall", toolCallsJson)); _history.Add(("tool", $"[Tool Results]\n{combinedResults.ToString().Trim()}")); PersistHistory(); diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/AITool.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/AITool.cs index e51fc7a9..a4cae037 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/AITool.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/AITool.cs @@ -1,8 +1,10 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Text.RegularExpressions; using System.Threading.Tasks; using Verse; +using WulaFallenEmpire.EventSystem.AI.Utils; namespace WulaFallenEmpire.EventSystem.AI.Tools { @@ -10,34 +12,141 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public abstract string Name { get; } public abstract string Description { get; } - public abstract string UsageSchema { get; } // XML schema description + public abstract string UsageSchema { get; } // JSON schema description public virtual string Execute(string args) => "Error: Synchronous execution not supported for this tool."; public virtual Task ExecuteAsync(string args) => Task.FromResult(Execute(args)); /// - /// Helper method to parse XML arguments into a dictionary. - /// Supports simple tags and CDATA blocks. + /// Helper method to parse JSON arguments into a dictionary. /// - protected Dictionary ParseXmlArgs(string xml) + protected Dictionary ParseJsonArgs(string json) { - var argsDict = new Dictionary(); - if (string.IsNullOrEmpty(xml)) return argsDict; - - // Regex to match value or - // Group 1: Tag name - // Group 2: CDATA value - // Group 3: Simple value - var paramMatches = Regex.Matches(xml, @"<([a-zA-Z0-9_]+)>(?:|(.*?))", RegexOptions.Singleline); - - foreach (Match match in paramMatches) + var argsDict = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (string.IsNullOrWhiteSpace(json)) return argsDict; + if (JsonToolCallParser.TryParseObject(json, out Dictionary parsed)) { - string key = match.Groups[1].Value; - string value = match.Groups[2].Success ? match.Groups[2].Value : match.Groups[3].Value; - argsDict[key] = value; + return parsed; } - return argsDict; } + + protected static bool TryGetString(Dictionary args, string key, out string value) + { + value = null; + if (args == null || string.IsNullOrWhiteSpace(key)) return false; + if (args.TryGetValue(key, out object raw) && raw != null) + { + value = Convert.ToString(raw, CultureInfo.InvariantCulture); + return !string.IsNullOrWhiteSpace(value); + } + return false; + } + + protected static bool TryGetInt(Dictionary args, string key, out int value) + { + value = 0; + if (!TryGetNumber(args, key, out double number)) return false; + value = (int)Math.Round(number); + return true; + } + + protected static bool TryGetFloat(Dictionary args, string key, out float value) + { + value = 0f; + if (!TryGetNumber(args, key, out double number)) return false; + value = (float)number; + return true; + } + + protected static bool TryGetBool(Dictionary args, string key, out bool value) + { + value = false; + if (args == null || string.IsNullOrWhiteSpace(key)) return false; + if (!args.TryGetValue(key, out object raw) || raw == null) return false; + if (raw is bool b) + { + value = b; + return true; + } + if (raw is string s && bool.TryParse(s, out bool parsed)) + { + value = parsed; + return true; + } + if (raw is long l) + { + value = l != 0; + return true; + } + if (raw is double d) + { + value = Math.Abs(d) > 0.0001; + return true; + } + return false; + } + + protected static bool TryGetObject(Dictionary args, string key, out Dictionary value) + { + value = null; + if (args == null || string.IsNullOrWhiteSpace(key)) return false; + if (args.TryGetValue(key, out object raw) && raw is Dictionary dict) + { + value = dict; + return true; + } + return false; + } + + protected static bool TryGetList(Dictionary args, string key, out List value) + { + value = null; + if (args == null || string.IsNullOrWhiteSpace(key)) return false; + if (args.TryGetValue(key, out object raw) && raw is List list) + { + value = list; + return true; + } + return false; + } + + protected static bool LooksLikeJson(string input) + { + return JsonToolCallParser.LooksLikeJson(input); + } + + private static bool TryGetNumber(Dictionary args, string key, out double value) + { + value = 0; + if (args == null || string.IsNullOrWhiteSpace(key)) return false; + if (!args.TryGetValue(key, out object raw) || raw == null) return false; + if (raw is double d) + { + value = d; + return true; + } + if (raw is float f) + { + value = f; + return true; + } + if (raw is int i) + { + value = i; + return true; + } + if (raw is long l) + { + value = l; + return true; + } + if (raw is string s && double.TryParse(s, NumberStyles.Float, CultureInfo.InvariantCulture, out double parsed)) + { + value = parsed; + return true; + } + return false; + } } -} \ No newline at end of file +} diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/BombardmentUtility.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/BombardmentUtility.cs index 05e467c4..a7a0d703 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/BombardmentUtility.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/BombardmentUtility.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using RimWorld; using UnityEngine; @@ -10,12 +11,12 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public static class BombardmentUtility { - public static string ExecuteCircularBombardment(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityCircularBombardment props, Dictionary parsed = null) + public static string ExecuteCircularBombardment(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityCircularBombardment props, Dictionary parsed = null) { if (props.skyfallerDef == null) return $"Error: '{def.defName}' has no skyfallerDef."; bool filter = true; - if (parsed != null && parsed.TryGetValue("filterFriendlyFire", out var ffStr) && bool.TryParse(ffStr, out bool ff)) filter = ff; + if (TryGetBool(parsed, "filterFriendlyFire", out bool ff)) filter = ff; List selectedTargets = SelectTargetCells(map, targetCell, props, filter); if (selectedTargets.Count == 0) return $"Error: No valid target cells near {targetCell}."; @@ -26,7 +27,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools return $"Success: Scheduled Circular Bombardment '{def.defName}' at {targetCell}. Launches: {totalLaunches}/{props.maxLaunches}."; } - public static string ExecuteStrafeBombardment(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityBombardment props, Dictionary parsed = null) + public static string ExecuteStrafeBombardment(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityBombardment props, Dictionary parsed = null) { if (props.skyfallerDef == null) return $"Error: '{def.defName}' has no skyfallerDef."; @@ -101,11 +102,11 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools // To simplify, let's just copy the core logic or create a private helper that takes explicit args. // Actually, the main method parses direction from 'parsed'. // Let's make a Dictionary to pass to it. - var dict = new Dictionary { { "angle", angle.ToString() } }; - return ExecuteStrafeBombardment(map, targetCell, def, props, dict); + var dict = new Dictionary { { "angle", angle } }; + return ExecuteStrafeBombardment(map, targetCell, def, props, dict); } - public static string ExecuteEnergyLance(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityEnergyLance props, Dictionary parsed = null) + public static string ExecuteEnergyLance(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityEnergyLance props, Dictionary parsed = null) { ThingDef lanceDef = props.energyLanceDef ?? DefDatabase.GetNamedSilentFail("EnergyLance"); if (lanceDef == null) return $"Error: Could not resolve EnergyLance ThingDef for '{def.defName}'."; @@ -135,7 +136,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools public static string ExecuteEnergyLanceDirect(Map map, IntVec3 targetCell, AbilityDef def, CompProperties_AbilityEnergyLance props, float angle) { - var dict = new Dictionary { { "angle", angle.ToString() } }; + var dict = new Dictionary { { "angle", angle } }; return ExecuteEnergyLance(map, targetCell, def, props, dict); } @@ -166,7 +167,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools // --- Helpers --- - private static void ParseDirectionInfo(Dictionary parsed, IntVec3 startPos, float moveDistance, bool useFixedDistance, out Vector3 direction, out IntVec3 endPos) + private static void ParseDirectionInfo(Dictionary parsed, IntVec3 startPos, float moveDistance, bool useFixedDistance, out Vector3 direction, out IntVec3 endPos) { direction = Vector3.forward; endPos = startPos; @@ -178,7 +179,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools return; } - if (parsed.TryGetValue("angle", out var angleStr) && float.TryParse(angleStr, out float angle)) + if (TryGetFloat(parsed, "angle", out float angle)) { direction = Quaternion.AngleAxis(angle, Vector3.up) * Vector3.forward; endPos = (startPos.ToVector3() + direction * moveDistance).ToIntVec3(); @@ -204,19 +205,18 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } - private static bool TryParseDirectionCell(Dictionary parsed, out IntVec3 cell) + private static bool TryParseDirectionCell(Dictionary parsed, out IntVec3 cell) { cell = IntVec3.Invalid; if (parsed == null) return false; - if (parsed.TryGetValue("dirX", out var xStr) && parsed.TryGetValue("dirZ", out var zStr) && - int.TryParse(xStr, out int x) && int.TryParse(zStr, out int z)) + if (TryGetInt(parsed, "dirX", out int x) && TryGetInt(parsed, "dirZ", out int z)) { cell = new IntVec3(x, 0, z); return true; } - if (parsed.TryGetValue("direction", out var dirStr) && !string.IsNullOrWhiteSpace(dirStr)) + if (TryGetString(parsed, "direction", out var dirStr) && !string.IsNullOrWhiteSpace(dirStr)) { var parts = dirStr.Split(new[] { ',', ' ' }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length >= 2 && int.TryParse(parts[0], out int dx) && int.TryParse(parts[1], out int dz)) @@ -425,5 +425,91 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools return sortedRows; } + + private static bool TryGetString(Dictionary parsed, string key, out string value) + { + value = null; + if (parsed == null || string.IsNullOrWhiteSpace(key)) return false; + if (!parsed.TryGetValue(key, out object raw) || raw == null) return false; + value = Convert.ToString(raw, CultureInfo.InvariantCulture); + return !string.IsNullOrWhiteSpace(value); + } + + private static bool TryGetInt(Dictionary parsed, string key, out int value) + { + value = 0; + if (!TryGetNumber(parsed, key, out double number)) return false; + value = (int)Math.Round(number); + return true; + } + + private static bool TryGetFloat(Dictionary parsed, string key, out float value) + { + value = 0f; + if (!TryGetNumber(parsed, key, out double number)) return false; + value = (float)number; + return true; + } + + private static bool TryGetBool(Dictionary parsed, string key, out bool value) + { + value = false; + if (parsed == null || string.IsNullOrWhiteSpace(key)) return false; + if (!parsed.TryGetValue(key, out object raw) || raw == null) return false; + if (raw is bool b) + { + value = b; + return true; + } + if (raw is string s && bool.TryParse(s, out bool parsedBool)) + { + value = parsedBool; + return true; + } + if (raw is long l) + { + value = l != 0; + return true; + } + if (raw is double d) + { + value = Math.Abs(d) > 0.0001; + return true; + } + return false; + } + + private static bool TryGetNumber(Dictionary parsed, string key, out double value) + { + value = 0; + if (parsed == null || string.IsNullOrWhiteSpace(key)) return false; + if (!parsed.TryGetValue(key, out object raw) || raw == null) return false; + if (raw is double d) + { + value = d; + return true; + } + if (raw is float f) + { + value = f; + return true; + } + if (raw is int i) + { + value = i; + return true; + } + if (raw is long l) + { + value = l; + return true; + } + if (raw is string s && double.TryParse(s, NumberStyles.Float, CultureInfo.InvariantCulture, out double parsedNum)) + { + value = parsedNum; + return true; + } + return false; + } } } diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs index 2b0570ca..a1fc6edd 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_AnalyzeScreen.cs @@ -1,23 +1,23 @@ using System; +using System.Collections.Generic; using System.Threading.Tasks; namespace WulaFallenEmpire.EventSystem.AI.Tools { /// - /// VLM 视觉分析工具 - 截取游戏屏幕并使用视觉语言模型分析 + /// VLM visual analysis tool. /// public class Tool_AnalyzeScreen : AITool { public override string Name => "analyze_screen"; - - public override string Description => - "分析当前游戏屏幕截图。你可以提供具体的指令(instruction)告诉视觉模型你需要观察什么、寻找什么、或者如何描述屏幕。"; - - public override string UsageSchema => - "给视觉模型的具体指令。例如:'找到科研按钮的比例坐标' 或 '描述当前角色的健康状态栏内容'"; - - private const string BaseVisionSystemPrompt = "你是一个专业的老练 RimWorld 助手。你会根据指示分析屏幕截图。保持回答专业且简洁。不要输出 XML 标签,除非被明确要求。"; - + + public override string Description => + "Analyze the current game screen screenshot. Provide an instruction to guide the analysis."; + + public override string UsageSchema => "{\"instruction\":\"Describe the current screen\"}"; + + private const string BaseVisionSystemPrompt = "You are a seasoned RimWorld assistant. Analyze the screenshot per instruction. Keep replies concise. Do not output tool call JSON unless explicitly asked."; + public override async Task ExecuteAsync(string args) { try @@ -27,47 +27,43 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools catch (Exception ex) { WulaLog.Debug($"[Tool_AnalyzeScreen] Execute error: {ex}"); - return $"视觉分析出错: {ex.Message}"; + return $"Vision analysis error: {ex.Message}"; } } - private async Task ExecuteInternalAsync(string xmlContent) + private async Task ExecuteInternalAsync(string jsonContent) { - var argsDict = ParseXmlArgs(xmlContent); - // 优先使用 instruction,兼容旧的 context 参数 - string instruction = argsDict.TryGetValue("instruction", out var inst) ? inst : - (argsDict.TryGetValue("context", out var ctx) ? ctx : "描述当前屏幕内容,重点关注 UI 状态和重要实体。"); - + var argsDict = ParseJsonArgs(jsonContent); + string instruction = TryGetString(argsDict, "instruction", out var inst) ? inst : + (TryGetString(argsDict, "context", out var ctx) ? ctx : "Describe the current screen, focusing on UI state and key entities."); + try { - // 检查 VLM 配置 + // Check VLM settings var settings = WulaFallenEmpireMod.settings; if (settings == null) { - return "Mod 设置未初始化。"; + return "Mod settings not initialized."; } - - // 根据协议选择配置 + string vlmApiKey = settings.useGeminiProtocol ? settings.geminiApiKey : settings.apiKey; string vlmBaseUrl = settings.useGeminiProtocol ? settings.geminiBaseUrl : settings.baseUrl; string vlmModel = settings.useGeminiProtocol ? settings.geminiModel : settings.model; - + if (string.IsNullOrEmpty(vlmApiKey)) { - return "API 密钥未配置。请在 Mod 设置中配置。"; + return "API key not configured. Please configure it in Mod settings."; } - - // 截取屏幕 + string base64Image = ScreenCaptureUtility.CaptureScreenAsBase64(); if (string.IsNullOrEmpty(base64Image)) { - return "截屏失败,无法分析屏幕。"; + return "Screenshot capture failed; cannot analyze screen."; } - - // 调用 VLM API (使用统一的 GetChatCompletionAsync) + var client = new SimpleAIClient(vlmApiKey, vlmBaseUrl, vlmModel, settings.useGeminiProtocol); - - var messages = new System.Collections.Generic.List<(string role, string message)> + + var messages = new List<(string role, string message)> { ("user", instruction) }; @@ -79,18 +75,18 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools temperature: 0.2f, base64Image: base64Image ); - + if (string.IsNullOrEmpty(result)) { - return "VLM 分析无响应,请检查 API 配置。"; + return "Vision analysis produced no response. Check API settings."; } - - return $"屏幕分析结果: {result.Trim()}"; + + return $"Screen analysis result: {result.Trim()}"; } catch (Exception ex) { WulaLog.Debug($"[Tool_AnalyzeScreen] Error: {ex}"); - return $"视觉分析出错: {ex.Message}"; + return $"Vision analysis error: {ex.Message}"; } } } diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallBombardment.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallBombardment.cs index 4beeef98..d85f79f3 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallBombardment.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallBombardment.cs @@ -13,21 +13,21 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "call_bombardment"; public override string Description => "Calls orbital bombardment/support using an AbilityDef configuration (e.g., WULA_Firepower_Cannon_Salvo, WULA_Firepower_EnergyLance_Strafe). Supports Circular Bombardment, Strafe, Energy Lance, and Surveillance."; - public override string UsageSchema => "stringintintx,zx,z (optional)degrees (optional)true/false"; + public override string UsageSchema => "{\"abilityDef\":\"WULA_Firepower_Cannon_Salvo\",\"x\":12,\"z\":34,\"direction\":\"20,30\",\"angle\":90,\"filterFriendlyFire\":true}"; public override string Execute(string args) { try { - var parsed = ParseXmlArgs(args); + var parsed = ParseJsonArgs(args); - string abilityDefName = parsed.TryGetValue("abilityDef", out var abilityStr) && !string.IsNullOrWhiteSpace(abilityStr) + string abilityDefName = TryGetString(parsed, "abilityDef", out var abilityStr) && !string.IsNullOrWhiteSpace(abilityStr) ? abilityStr.Trim() : "WULA_Firepower_Cannon_Salvo"; if (!TryParseTargetCell(parsed, out var targetCell)) { - return "Error: Missing target coordinates. Provide and (or x,z)."; + return "Error: Missing target coordinates. Provide 'x' and 'z' (or 'cell' formatted as 'x,z')."; } Map map = Find.CurrentMap; @@ -58,18 +58,17 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } - private static bool TryParseTargetCell(Dictionary parsed, out IntVec3 cell) + private static bool TryParseTargetCell(Dictionary parsed, out IntVec3 cell) { cell = IntVec3.Invalid; - if (parsed.TryGetValue("x", out var xStr) && parsed.TryGetValue("z", out var zStr) && - int.TryParse(xStr, out int x) && int.TryParse(zStr, out int z)) + if (TryGetInt(parsed, "x", out int x) && TryGetInt(parsed, "z", out int z)) { cell = new IntVec3(x, 0, z); return true; } - if (parsed.TryGetValue("cell", out var cellStr) && !string.IsNullOrWhiteSpace(cellStr)) + if (TryGetString(parsed, "cell", out var cellStr) && !string.IsNullOrWhiteSpace(cellStr)) { var parts = cellStr.Split(new[] { ',', '\uFF0C', ' ' }, StringSplitOptions.RemoveEmptyEntries); if (parts.Length >= 2 && int.TryParse(parts[0], out int cx) && int.TryParse(parts[1], out int cz)) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallPrefabAirdrop.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallPrefabAirdrop.cs index 78e2e536..a9ec9a01 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallPrefabAirdrop.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_CallPrefabAirdrop.cs @@ -16,26 +16,25 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools "You must specify the prefabDefName (e.g., 'WULA_NewColonyBase') and the coordinates (x, z). " + "TIP: Use the 'get_available_prefabs' tool first to see which structures are available. " + "The default skyfaller animation is 'WULA_Prefab_Incoming'."; - public override string UsageSchema => "DefName of the prefabOptional, default is WULA_Prefab_Incomingintint"; + public override string UsageSchema => "{\"prefabDefName\":\"WULA_NewColonyBase\",\"skyfallerDef\":\"WULA_Prefab_Incoming\",\"x\":10,\"z\":20}"; public override string Execute(string args) { try { - var parsed = ParseXmlArgs(args); + var parsed = ParseJsonArgs(args); - if (!parsed.TryGetValue("prefabDefName", out string prefabDefName) || string.IsNullOrWhiteSpace(prefabDefName)) + if (!TryGetString(parsed, "prefabDefName", out string prefabDefName) || string.IsNullOrWhiteSpace(prefabDefName)) { - return "Error: Missing . Example: WULA_NewColonyBase"; + return "Error: Missing 'prefabDefName'."; } - if (!parsed.TryGetValue("x", out string xStr) || !int.TryParse(xStr, out int x) || - !parsed.TryGetValue("z", out string zStr) || !int.TryParse(zStr, out int z)) + if (!TryGetInt(parsed, "x", out int x) || !TryGetInt(parsed, "z", out int z)) { - return "Error: Missing or invalid target coordinates. Provide and ."; + return "Error: Missing or invalid target coordinates. Provide 'x' and 'z'."; } - string skyfallerDefName = parsed.TryGetValue("skyfallerDef", out string sd) && !string.IsNullOrWhiteSpace(sd) + string skyfallerDefName = TryGetString(parsed, "skyfallerDef", out string sd) && !string.IsNullOrWhiteSpace(sd) ? sd.Trim() : "WULA_Prefab_Incoming"; @@ -62,7 +61,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools // Auto-Scan for valid position IntVec3 validCell = targetCell; bool foundSpot = false; - + // Get prefab size from its size field. If not set, default to 1x1 (though prefabs are usually larger) IntVec2 size = prefabDef.size; @@ -70,7 +69,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools bool IsPositionValid(IntVec3 center, Map m, IntVec2 s) { if (!center.InBounds(m)) return false; - + CellRect rect = GenAdj.OccupiedRect(center, Rot4.North, s); if (!rect.InBounds(m)) return false; @@ -97,7 +96,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } else { - // Spiral scan for a nearby valid spot. + // Spiral scan for a nearby valid spot. // Radius ~20 should be enough to find a spot without deviating too far. foreach (IntVec3 c in GenRadial.RadialCellsAround(targetCell, 20f, useCenter: false)) { @@ -112,12 +111,12 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools if (!foundSpot) { - return $"Error: Could not find a valid clear space for '{prefabDefName}' (Size: {size.x}x{size.z}) near {targetCell}. Area may be blocked by thick roofs, water, or other buildings."; + return $"Error: Could not find a valid clear space for '{prefabDefName}' (Size: {size.x}x{size.z}) near {targetCell}. Area may be blocked by thick roofs, water, or other buildings."; } // Spawning must happen on main thread string resultMessage = $"Success: Scheduled airdrop for '{prefabDefName}' at valid position {validCell} (adjusted from {targetCell}) using {skyfallerDefName}."; - + // Use the found valid cell string pDef = prefabDefName; ThingDef sDef = skyfallerDef; diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ChangeExpression.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ChangeExpression.cs index 6234d51b..b1d80cc9 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ChangeExpression.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ChangeExpression.cs @@ -8,31 +8,27 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "change_expression"; public override string Description => "Changes your visual expression/portrait to match your current mood or reaction."; - public override string UsageSchema => "int (1-6)"; + public override string UsageSchema => "{\"expression_id\": 2}"; public override string Execute(string args) { try { - var parsedArgs = ParseXmlArgs(args); + var parsedArgs = ParseJsonArgs(args); int id = 0; - if (parsedArgs.TryGetValue("expression_id", out string idStr)) + if (TryGetInt(parsedArgs, "expression_id", out id)) { - if (int.TryParse(idStr, out id)) + var core = AIIntelligenceCore.Instance; + if (core != null) { - var core = AIIntelligenceCore.Instance; - if (core != null) - { - core.SetPortrait(id); - return $"Expression changed to {id}."; - } - return "Error: AI Core not found."; + core.SetPortrait(id); + return $"Expression changed to {id}."; } - return "Error: Invalid arguments. 'expression_id' must be an integer."; + return "Error: AI Core not found."; } - return "Error: Missing parameter."; + return "Error: Missing 'expression_id' parameter."; } catch (Exception ex) { diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailableBombardments.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailableBombardments.cs index 1f4a56c2..579ca2fb 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailableBombardments.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailableBombardments.cs @@ -12,7 +12,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools public override string Name => "get_available_bombardments"; public override string Description => "Returns a list of available orbital bombardment abilities (AbilityDefs) that can be called. " + "Use this to find the correct 'abilityDef' for the 'call_bombardment' tool."; - public override string UsageSchema => ""; + public override string UsageSchema => "{}"; public override string Execute(string args) { diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailablePrefabs.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailablePrefabs.cs index ba72f370..58729ebe 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailablePrefabs.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetAvailablePrefabs.cs @@ -12,7 +12,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools public override string Name => "get_available_prefabs"; public override string Description => "Returns a list of available building prefabs (blueprints) that can be summoned. " + "Use this to find the correct 'prefabDefName' for the 'call_prefab_airdrop' tool."; - public override string UsageSchema => ""; + public override string UsageSchema => "{}"; public override string Execute(string args) { diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapPawns.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapPawns.cs index 3efffd79..73c41427 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapPawns.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapPawns.cs @@ -12,7 +12,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools public override string Name => "get_map_pawns"; public override string Description => "Scans the current map and lists pawns (including corpses). Supports filtering by relation (friendly/hostile/neutral), type (colonist/animal/mech/humanlike), and status (prisoner/slave/guest/wild/downed/dead)."; public override string UsageSchema => - "string (optional, comma-separated: friendly, hostile, neutral, colonist, animal, mech, humanlike, prisoner, slave, guest, wild, downed, dead)true/false (optional, default true)int (optional, default 50)"; + "{\"filter\":\"friendly,hostile,colonist\",\"includeDead\":true,\"maxResults\":50}"; private struct MapPawnEntry { @@ -25,22 +25,16 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { try { - var parsed = ParseXmlArgs(args); + var parsed = ParseJsonArgs(args); string filterRaw = null; - if (parsed.TryGetValue("filter", out string f)) filterRaw = f; + if (TryGetString(parsed, "filter", out string f)) filterRaw = f; int maxResults = 50; - if (parsed.TryGetValue("maxResults", out string maxStr) && int.TryParse(maxStr, out int mr)) - { - maxResults = Math.Max(1, Math.Min(200, mr)); - } + if (TryGetInt(parsed, "maxResults", out int mr)) maxResults = Math.Max(1, Math.Min(200, mr)); bool includeDead = true; - if (parsed.TryGetValue("includeDead", out string includeDeadStr) && bool.TryParse(includeDeadStr, out bool parsedIncludeDead)) - { - includeDead = parsedIncludeDead; - } + if (TryGetBool(parsed, "includeDead", out bool parsedIncludeDead)) includeDead = parsedIncludeDead; Map map = Find.CurrentMap; if (map == null) return "Error: No active map."; @@ -236,4 +230,3 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } } - diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapResources.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapResources.cs index 9dad7361..1fba60eb 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapResources.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetMapResources.cs @@ -12,7 +12,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "get_map_resources"; public override string Description => "Checks the player's map for specific resources or buildings. Use this to verify if the player is truly lacking something they requested (e.g., 'we need steel'). Returns inventory count and mineable deposits."; - public override string UsageSchema => "string (optional, e.g., 'Steel')"; + public override string UsageSchema => "{\"resourceName\":\"Steel\"}"; public override string Execute(string args) { @@ -22,18 +22,14 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools if (map == null) return "Error: No active map."; string resourceName = ""; - var parsedArgs = ParseXmlArgs(args); - if (parsedArgs.TryGetValue("resourceName", out string resName)) + var parsedArgs = ParseJsonArgs(args); + if (TryGetString(parsedArgs, "resourceName", out string resName)) { resourceName = resName; } - else + else if (!LooksLikeJson(args)) { - // Fallback - if (!args.Trim().StartsWith("<")) - { - resourceName = args; - } + resourceName = args; } StringBuilder sb = new StringBuilder(); @@ -106,4 +102,4 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } } -} \ No newline at end of file +} diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetPawnStatus.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetPawnStatus.cs index 7afcd805..d5fc516a 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetPawnStatus.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetPawnStatus.cs @@ -11,16 +11,16 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "get_pawn_status"; public override string Description => "Returns detailed status (health, needs, gear) of specified pawns. Use this to check for sickness, injuries, mood, or equipment. Can filter by name, category (colonist/animal/prisoner/guest), or status (sick/injured)."; - public override string UsageSchema => "optional_partial_namecolonist/animal/prisoner/guest/all (default: all)sick/injured/downed/dead (optional)"; + public override string UsageSchema => "{\"name\":\"optional\",\"category\":\"colonist\",\"filter\":\"sick\"}"; public override string Execute(string args) { try { - var parsed = ParseXmlArgs(args); - string nameTarget = parsed.TryGetValue("name", out string n) ? n.ToLower() : null; - string category = parsed.TryGetValue("category", out string c) ? c.ToLower() : "all"; - string filter = parsed.TryGetValue("filter", out string f) ? f.ToLower() : null; + var parsed = ParseJsonArgs(args); + string nameTarget = TryGetString(parsed, "name", out string n) ? n.ToLower() : null; + string category = TryGetString(parsed, "category", out string c) ? c.ToLower() : "all"; + string filter = TryGetString(parsed, "filter", out string f) ? f.ToLower() : null; Map map = Find.CurrentMap; if (map == null) return "Error: No active map."; diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetRecentNotifications.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetRecentNotifications.cs index d5e2016d..d121d45a 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetRecentNotifications.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_GetRecentNotifications.cs @@ -7,6 +7,7 @@ using System.Text; using Verse; using System.Text.RegularExpressions; using WulaFallenEmpire.EventSystem.AI; +using WulaFallenEmpire.EventSystem.AI.Utils; namespace WulaFallenEmpire.EventSystem.AI.Tools { @@ -15,7 +16,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools public override string Name => "get_recent_notifications"; public override string Description => "Returns the most recent letters and messages, sorted by in-game time from newest to oldest."; public override string UsageSchema => - "int (optional, default 10, max 100)true/false (optional, default true)true/false (optional, default true)"; + "{\"count\":10,\"includeLetters\":true,\"includeMessages\":true}"; private struct NotificationEntry { @@ -33,21 +34,10 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools bool includeLetters = true; bool includeMessages = true; - var parsed = ParseXmlArgs(args); - if (parsed.TryGetValue("count", out var countStr) && int.TryParse(countStr, out int parsedCount)) - { - count = parsedCount; - } - - if (parsed.TryGetValue("includeLetters", out var incLettersStr) && bool.TryParse(incLettersStr, out bool parsedLetters)) - { - includeLetters = parsedLetters; - } - - if (parsed.TryGetValue("includeMessages", out var incMessagesStr) && bool.TryParse(incMessagesStr, out bool parsedMessages)) - { - includeMessages = parsedMessages; - } + var parsed = ParseJsonArgs(args); + if (TryGetInt(parsed, "count", out int parsedCount)) count = parsedCount; + if (TryGetBool(parsed, "includeLetters", out bool parsedLetters)) includeLetters = parsedLetters; + if (TryGetBool(parsed, "includeMessages", out bool parsedMessages)) includeMessages = parsedMessages; count = Math.Max(1, Math.Min(100, count)); @@ -116,7 +106,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools var history = core.GetHistorySnapshot(); if (history == null || history.Count == 0) return "AI Tool History: none found."; - var entries = new List<(string ToolXml, string ToolResult)>(); + var entries = new List<(string ToolJson, string ToolResult)>(); for (int i = history.Count - 1; i >= 0; i--) { var entry = history[i]; @@ -126,7 +116,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools for (int j = i - 1; j >= 0; j--) { var prev = history[j]; - if (string.Equals(prev.role, "toolcall", StringComparison.OrdinalIgnoreCase) && IsXmlToolCall(prev.message)) + if (string.Equals(prev.role, "toolcall", StringComparison.OrdinalIgnoreCase) && IsToolCallJson(prev.message)) { entries.Add((prev.message ?? "", toolResult)); i = j; @@ -143,17 +133,17 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools for (int i = 0; i < entries.Count; i++) { if (i > 0) sb.AppendLine(); - sb.AppendLine(entries[i].ToolXml.Trim()); + sb.AppendLine(entries[i].ToolJson.Trim()); sb.AppendLine(entries[i].ToolResult.Trim()); } return sb.ToString().TrimEnd(); } - private static bool IsXmlToolCall(string response) + private static bool IsToolCallJson(string response) { if (string.IsNullOrWhiteSpace(response)) return false; - return Regex.IsMatch(response, @"<([a-zA-Z0-9_]+)(?:>.*?|/>)", RegexOptions.Singleline); + return JsonToolCallParser.TryParseToolCalls(response, out _); } private static IEnumerable ReadLetters(int fallbackNow) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ModifyGoodwill.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ModifyGoodwill.cs index d14824cb..c28675b8 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ModifyGoodwill.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_ModifyGoodwill.cs @@ -8,32 +8,21 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "modify_goodwill"; public override string Description => "Adjusts YOUR internal opinion of the player (AI Goodwill). WARNING: This DOES NOT affect Faction Relations or stop raids. It is purely personal. Do NOT use this to try to stop enemies."; - public override string UsageSchema => "integer"; + public override string UsageSchema => "{\"amount\": 1}"; public override string Execute(string args) { try { - var parsedArgs = ParseXmlArgs(args); + var parsedArgs = ParseJsonArgs(args); int amount = 0; - if (parsedArgs.TryGetValue("amount", out string amountStr)) - { - if (!int.TryParse(amountStr, out amount)) - { - return $"Error: Invalid amount '{amountStr}'. Must be an integer."; - } - } - else + if (!TryGetInt(parsedArgs, "amount", out amount)) { // Fallback for simple number string - if (int.TryParse(args.Trim(), out int val)) + if (!int.TryParse(args?.Trim(), out amount)) { - amount = val; - } - else - { - return "Error: Missing parameter."; + return "Error: Missing 'amount' parameter."; } } @@ -60,4 +49,4 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } } -} \ No newline at end of file +} diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RecallMemories.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RecallMemories.cs index 0536d710..f59c61a1 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RecallMemories.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RecallMemories.cs @@ -11,19 +11,13 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "recall_memories"; public override string Description => "Searches the AI's long-term memory for facts matching a specific query or keyword."; - public override string UsageSchema => "Search keywordsoptional_int_max_results"; + public override string UsageSchema => "{\"query\":\"keywords\",\"limit\":5}"; public override string Execute(string args) { - var argsDict = ParseXmlArgs(args); - string query = argsDict.TryGetValue("query", out string q) ? q : ""; - string limitStr = argsDict.TryGetValue("limit", out string lStr) ? lStr : "5"; - - int limit = 5; - if (int.TryParse(limitStr, out int parsedLimit)) - { - limit = parsedLimit; - } + var argsDict = ParseJsonArgs(args); + string query = TryGetString(argsDict, "query", out string q) ? q : ""; + int limit = TryGetInt(argsDict, "limit", out int parsedLimit) ? parsedLimit : 5; var memoryManager = Find.World?.GetComponent(); if (memoryManager == null) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RememberFact.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RememberFact.cs index 08cf538d..bef5456a 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RememberFact.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_RememberFact.cs @@ -9,17 +9,17 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "remember_fact"; public override string Description => "Stores a specific fact or piece of information into the AI's long-term memory for future retrieval."; - public override string UsageSchema => "Text content to rememberoptional_category"; + public override string UsageSchema => "{\"fact\":\"...\",\"category\":\"misc\"}"; public override string Execute(string args) { - var argsDict = ParseXmlArgs(args); - if (!argsDict.TryGetValue("fact", out string fact) || string.IsNullOrWhiteSpace(fact)) + var argsDict = ParseJsonArgs(args); + if (!TryGetString(argsDict, "fact", out string fact) || string.IsNullOrWhiteSpace(fact)) { - return "Error: content is required."; + return "Error: 'fact' content is required."; } - string category = argsDict.TryGetValue("category", out string cat) ? cat : "misc"; + string category = TryGetString(argsDict, "category", out string cat) ? cat : "misc"; var memoryManager = Find.World?.GetComponent(); if (memoryManager == null) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchPawnKind.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchPawnKind.cs index be8d6d31..48399f41 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchPawnKind.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchPawnKind.cs @@ -10,18 +10,18 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "search_pawn_kind"; public override string Description => "Rough-searches PawnKindDefs by natural language (label/defName). Returns candidate defNames for send_reinforcement."; - public override string UsageSchema => "stringint (optional, default 10)float (optional, default 0.15)"; + public override string UsageSchema => "{\"query\":\"escort\",\"maxResults\":10,\"minScore\":0.15}"; public override string Execute(string args) { try { - var parsed = ParseXmlArgs(args); + var parsed = ParseJsonArgs(args); string query = null; - if (parsed.TryGetValue("query", out string q)) query = q; + if (TryGetString(parsed, "query", out string q)) query = q; if (string.IsNullOrWhiteSpace(query)) { - if (!string.IsNullOrWhiteSpace(args) && !args.Trim().StartsWith("<")) + if (!string.IsNullOrWhiteSpace(args) && !LooksLikeJson(args)) { query = args; } @@ -33,16 +33,10 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } int maxResults = 10; - if (parsed.TryGetValue("maxResults", out string maxStr) && int.TryParse(maxStr, out int mr)) - { - maxResults = Math.Max(1, Math.Min(50, mr)); - } + if (TryGetInt(parsed, "maxResults", out int mr)) maxResults = Math.Max(1, Math.Min(50, mr)); float minScore = 0.15f; - if (parsed.TryGetValue("minScore", out string minStr) && float.TryParse(minStr, out float ms)) - { - minScore = Math.Max(0.01f, Math.Min(1.0f, ms)); - } + if (TryGetFloat(parsed, "minScore", out float ms)) minScore = Math.Max(0.01f, Math.Min(1.0f, ms)); var candidates = PawnKindDefSearcher.Search(query, maxResults: maxResults, minScore: minScore); if (candidates.Count == 0) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchThingDef.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchThingDef.cs index 01923709..43165b0f 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchThingDef.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SearchThingDef.cs @@ -11,18 +11,18 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "search_thing_def"; public override string Description => "Rough-searches RimWorld ThingDefs by natural language (label/defName). Returns candidate defNames so you can use them in other tools like spawn_resources."; - public override string UsageSchema => "stringint (optional, default 10)true/false (optional, default true)"; + public override string UsageSchema => "{\"query\":\"Steel\",\"maxResults\":10,\"itemsOnly\":true}"; public override string Execute(string args) { try { - var parsed = ParseXmlArgs(args); + var parsed = ParseJsonArgs(args); string query = null; - if (parsed.TryGetValue("query", out string q)) query = q; + if (TryGetString(parsed, "query", out string q)) query = q; if (string.IsNullOrWhiteSpace(query)) { - if (!string.IsNullOrWhiteSpace(args) && !args.Trim().StartsWith("<")) + if (!string.IsNullOrWhiteSpace(args) && !LooksLikeJson(args)) { query = args; } @@ -34,16 +34,10 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } int maxResults = 10; - if (parsed.TryGetValue("maxResults", out string maxStr) && int.TryParse(maxStr, out int mr)) - { - maxResults = Math.Max(1, Math.Min(50, mr)); - } + if (TryGetInt(parsed, "maxResults", out int mr)) maxResults = Math.Max(1, Math.Min(50, mr)); bool itemsOnly = true; - if (parsed.TryGetValue("itemsOnly", out string itemsOnlyStr) && bool.TryParse(itemsOnlyStr, out bool parsedItemsOnly)) - { - itemsOnly = parsedItemsOnly; - } + if (TryGetBool(parsed, "itemsOnly", out bool parsedItemsOnly)) itemsOnly = parsedItemsOnly; var candidates = ThingDefSearcher.Search(query, maxResults: maxResults, itemsOnly: itemsOnly, minScore: 0.15f); if (candidates.Count == 0) @@ -91,4 +85,3 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } } - diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SendReinforcement.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SendReinforcement.cs index 89cb77b2..4efa3a7f 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SendReinforcement.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SendReinforcement.cs @@ -103,7 +103,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools return false; } - public override string UsageSchema => "string (e.g., 'Wula_PIA_Heavy_Unit_Melee: 2, Wula_PIA_Legion_Escort_Unit: 5')"; + public override string UsageSchema => "{\"units\": \"Wula_PIA_Heavy_Unit_Melee: 2, Wula_PIA_Legion_Escort_Unit: 5\"}"; public override string Execute(string args) { @@ -116,20 +116,16 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools if (faction == null) return "Error: Faction Wula_PIA_Legion_Faction not found."; // Parse args - var parsedArgs = ParseXmlArgs(args); + var parsedArgs = ParseJsonArgs(args); string unitString = ""; - if (parsedArgs.TryGetValue("units", out string units)) + if (TryGetString(parsedArgs, "units", out string units)) { unitString = units; } - else + else if (!LooksLikeJson(args)) { - // Fallback - if (!args.Trim().StartsWith("<")) - { - unitString = args; - } + unitString = args; } var unitPairs = unitString.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries); diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SetOverwatchMode.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SetOverwatchMode.cs index ea2faeff..15f955b7 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SetOverwatchMode.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SetOverwatchMode.cs @@ -10,23 +10,17 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { public override string Name => "set_overwatch_mode"; public override string Description => "Enables or disables the AI Overwatch Combat Protocol. When enabled (enabled=true), the AI will autonomously scan for hostile targets every few seconds and launch appropriate orbital bombardments for a set duration. When disabled (enabled=false), it immediately stops any active overwatch and clears the flight path. Use enabled=false to stop overwatch early if the player requests it."; - public override string UsageSchema => "true/falseamount (only needed when enabling)"; + public override string UsageSchema => "{\"enabled\":true,\"durationSeconds\":60}"; public override string Execute(string args) { - var parsed = ParseXmlArgs(args); + var parsed = ParseJsonArgs(args); bool enabled = true; - if (parsed.TryGetValue("enabled", out var enabledStr) && bool.TryParse(enabledStr, out bool e)) - { - enabled = e; - } + if (TryGetBool(parsed, "enabled", out bool e)) enabled = e; int duration = 60; - if (parsed.TryGetValue("durationSeconds", out var dStr) && int.TryParse(dStr, out int d)) - { - duration = d; - } + if (TryGetInt(parsed, "durationSeconds", out int d)) duration = d; Map map = Find.CurrentMap; if (map == null) return "Error: No active map."; diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SpawnResources.cs b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SpawnResources.cs index c6641475..7b59cfcf 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SpawnResources.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/Tools/Tool_SpawnResources.cs @@ -2,7 +2,6 @@ using System; using System.Collections.Generic; using System.Linq; using System.Text; -using System.Text.RegularExpressions; using RimWorld; using Verse; using WulaFallenEmpire.EventSystem.AI.Utils; @@ -18,8 +17,8 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools "If goodwill is low (< 0), give significantly less than asked or refuse. " + "If goodwill is high (> 50), you may give what is asked or slightly more. " + "Otherwise, give a moderate amount. " + - "TIP: Use the `search_thing_def` tool first and then spawn by DefName ( or put DefName into ) to avoid language mismatch."; - public override string UsageSchema => "Item NameInteger"; + "TIP: Use the `search_thing_def` tool first and then spawn by DefName to avoid language mismatch."; + public override string UsageSchema => "{\"items\":[{\"name\":\"Steel\",\"count\":100,\"stuffDefName\":\"Steel\"}]}"; public override string Execute(string args) { @@ -27,89 +26,44 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools { if (args == null) args = ""; - // Custom XML parsing for nested items + var parsedArgs = ParseJsonArgs(args); + var itemsToSpawn = new List<(ThingDef def, int count, string requestedName, string stuffDefName)>(); var substitutions = new List(); - - // Match all ... blocks - var itemMatches = Regex.Matches(args, @"]*>(.*?)", RegexOptions.Singleline | RegexOptions.IgnoreCase); - - foreach (Match match in itemMatches) + + if (TryGetList(parsedArgs, "items", out List itemsRaw)) { - string itemXml = match.Groups[1].Value; - - // Extract name (supports or for backward compatibility) - string ExtractTag(string xml, string tag) + foreach (var item in itemsRaw) { - var m = Regex.Match( - xml, - $@"<{tag}\b[^>]*>(?:|(.*?))", - RegexOptions.Singleline | RegexOptions.IgnoreCase); - if (!m.Success) return null; - string val = m.Groups[1].Success ? m.Groups[1].Value : m.Groups[2].Value; - return val?.Trim(); - } + if (item is not Dictionary itemDict) continue; - string name = ExtractTag(itemXml, "name") ?? ExtractTag(itemXml, "defName"); - string stuffDefName = ExtractTag(itemXml, "stuffDefName") ?? ExtractTag(itemXml, "stuff") ?? ExtractTag(itemXml, "material"); + string name = TryGetString(itemDict, "name", out string n) ? n : + (TryGetString(itemDict, "defName", out string dn) ? dn : null); - if (string.IsNullOrEmpty(name)) continue; + string stuffDefName = TryGetString(itemDict, "stuffDefName", out string sdn) ? sdn : + (TryGetString(itemDict, "stuff", out string s) ? s : + (TryGetString(itemDict, "material", out string m) ? m : null)); - // Extract count - string countStr = ExtractTag(itemXml, "count"); - if (string.IsNullOrEmpty(countStr)) continue; - if (!int.TryParse(countStr, out int count)) continue; - if (count <= 0) continue; + if (string.IsNullOrWhiteSpace(name)) continue; + if (!TryGetInt(itemDict, "count", out int count) || count <= 0) continue; - // Search for ThingDef - ThingDef def = null; - - // 1. Try exact defName match - def = DefDatabase.GetNamed(name.Trim(), false); - - // 2. Try exact label match (case-insensitive) - if (def == null) - { - foreach (var d in DefDatabase.AllDefs) - { - if (d.label != null && d.label.Equals(name.Trim(), StringComparison.OrdinalIgnoreCase)) - { - def = d; - break; - } - } - } - - // 3. Try fuzzy search (thresholded) - if (def == null) - { - var searchResult = ThingDefSearcher.ParseAndSearch(name); - if (searchResult.Count > 0) - { - def = searchResult[0].Def; - } - } - - // 4. Closest-match fallback: accept the best similar item even if not an exact match. - if (def == null) - { - ThingDefSearcher.TryFindBestThingDef(name, out ThingDef best, out float score, itemsOnly: true, minScore: 0.15f); - if (best != null && score >= 0.15f) - { - def = best; - substitutions.Add($"'{name}' -> '{best.label}' (score {score:F2})"); - } - } - - if (def != null) - { - itemsToSpawn.Add((def, count, name, stuffDefName)); + AddItem(name, count, stuffDefName, itemsToSpawn, substitutions); } } if (itemsToSpawn.Count == 0) { - // Fallback: allow natural language without blocks. + if (TryGetString(parsedArgs, "name", out string singleName) && TryGetInt(parsedArgs, "count", out int singleCount)) + { + string stuffDefName = TryGetString(parsedArgs, "stuffDefName", out string sdn) ? sdn : + (TryGetString(parsedArgs, "stuff", out string s) ? s : + (TryGetString(parsedArgs, "material", out string m) ? m : null)); + AddItem(singleName, singleCount, stuffDefName, itemsToSpawn, substitutions); + } + } + + if (itemsToSpawn.Count == 0 && !LooksLikeJson(args)) + { var parsed = ThingDefSearcher.ParseAndSearch(args); foreach (var r in parsed) { @@ -122,7 +76,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools if (itemsToSpawn.Count == 0) { - string msg = "Error: No valid items found in request. Usage: ......"; + string msg = "Error: No valid items found in request. Usage: {\"items\":[{\"name\":\"Steel\",\"count\":100}]}"; Messages.Message(msg, MessageTypeDefOf.RejectInput); return msg; } @@ -247,7 +201,7 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools if (thingsToDrop.Count > 0) { DropPodUtility.DropThingsNear(dropSpot, map, thingsToDrop); - + Faction faction = Find.FactionManager.FirstFactionOfDef(FactionDef.Named("Wula_PIA_Legion_Faction")); // Avoid unresolved named placeholders if the translation system doesn't pick up NamedArguments as expected. string template = "Wula_ResourceDrop".Translate(); @@ -294,6 +248,49 @@ namespace WulaFallenEmpire.EventSystem.AI.Tools } } + private static void AddItem(string name, int count, string stuffDefName, List<(ThingDef def, int count, string requestedName, string stuffDefName)> itemsToSpawn, List substitutions) + { + if (string.IsNullOrWhiteSpace(name) || count <= 0) return; + + ThingDef def = DefDatabase.GetNamed(name.Trim(), false); + + if (def == null) + { + foreach (var d in DefDatabase.AllDefs) + { + if (d.label != null && d.label.Equals(name.Trim(), StringComparison.OrdinalIgnoreCase)) + { + def = d; + break; + } + } + } + + if (def == null) + { + var searchResult = ThingDefSearcher.ParseAndSearch(name); + if (searchResult.Count > 0) + { + def = searchResult[0].Def; + } + } + + if (def == null) + { + ThingDefSearcher.TryFindBestThingDef(name, out ThingDef best, out float score, itemsOnly: true, minScore: 0.15f); + if (best != null && score >= 0.15f) + { + def = best; + substitutions.Add($"'{name}' -> '{best.label}' (score {score:F2})"); + } + } + + if (def != null) + { + itemsToSpawn.Add((def, count, name, stuffDefName)); + } + } + private static Map GetTargetMap() { Map map = Find.CurrentMap; diff --git a/Source/WulaFallenEmpire/EventSystem/AI/UI/Dialog_AIConversation.cs b/Source/WulaFallenEmpire/EventSystem/AI/UI/Dialog_AIConversation.cs index 4abe0615..88efb283 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/UI/Dialog_AIConversation.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/UI/Dialog_AIConversation.cs @@ -8,6 +8,7 @@ using RimWorld; using UnityEngine; using Verse; using WulaFallenEmpire.EventSystem.AI; +using WulaFallenEmpire.EventSystem.AI.Utils; using System.Text.RegularExpressions; namespace WulaFallenEmpire.EventSystem.AI.UI @@ -431,8 +432,14 @@ namespace WulaFallenEmpire.EventSystem.AI.UI { if (string.IsNullOrEmpty(rawResponse)) return ""; string text = rawResponse; - text = Regex.Replace(text, @"<([a-zA-Z0-9_]+)[^>]*>.*?", "", RegexOptions.Singleline); - text = Regex.Replace(text, @"<([a-zA-Z0-9_]+)[^>]*/>", ""); + if (JsonToolCallParser.TryParseToolCallsFromText(text, out _, out string fragment) && !string.IsNullOrWhiteSpace(fragment)) + { + int index = text.IndexOf(fragment, StringComparison.Ordinal); + if (index >= 0) + { + text = text.Remove(index, fragment.Length); + } + } text = ExpressionTagRegex.Replace(text, ""); text = text.Trim(); return text.Split(new[] { "OPTIONS:" }, StringSplitOptions.None)[0].Trim(); diff --git a/Source/WulaFallenEmpire/EventSystem/AI/UI/Overlay_WulaLink.cs b/Source/WulaFallenEmpire/EventSystem/AI/UI/Overlay_WulaLink.cs index 10bf970b..8fe6367a 100644 --- a/Source/WulaFallenEmpire/EventSystem/AI/UI/Overlay_WulaLink.cs +++ b/Source/WulaFallenEmpire/EventSystem/AI/UI/Overlay_WulaLink.cs @@ -5,6 +5,7 @@ using UnityEngine; using Verse; using RimWorld; using WulaFallenEmpire.EventSystem.AI; +using WulaFallenEmpire.EventSystem.AI.Utils; namespace WulaFallenEmpire.EventSystem.AI.UI { @@ -386,7 +387,7 @@ namespace WulaFallenEmpire.EventSystem.AI.UI string displayText = msg.message; if (msg.role == "assistant") { - displayText = StripXmlTags(msg.message)?.Trim() ?? ""; + displayText = StripToolCallJson(msg.message)?.Trim() ?? ""; } else if (msg.role == "user") { @@ -476,14 +477,17 @@ namespace WulaFallenEmpire.EventSystem.AI.UI Widgets.EndScrollView(); } - private static string StripXmlTags(string text) + private static string StripToolCallJson(string text) { if (string.IsNullOrEmpty(text)) return text; - // Remove XML tags with content: content - string stripped = System.Text.RegularExpressions.Regex.Replace(text, @"<(?!/?(i|b|color|size|material)\b)([a-zA-Z0-9_]+)[^>]*>.*?", "", System.Text.RegularExpressions.RegexOptions.Singleline); - // Remove self-closing tags: - stripped = System.Text.RegularExpressions.Regex.Replace(stripped, @"<([a-zA-Z0-9_]+)[^>]*/?>", ""); - return stripped; + if (!JsonToolCallParser.TryParseToolCallsFromText(text, out _, out string fragment)) + { + return text; + } + + int index = text.IndexOf(fragment, StringComparison.Ordinal); + if (index < 0) return text; + return text.Remove(index, fragment.Length).Trim(); } private void DrawFooter(Rect rect) diff --git a/Source/WulaFallenEmpire/EventSystem/AI/Utils/JsonToolCallParser.cs b/Source/WulaFallenEmpire/EventSystem/AI/Utils/JsonToolCallParser.cs new file mode 100644 index 00000000..4f6737dc --- /dev/null +++ b/Source/WulaFallenEmpire/EventSystem/AI/Utils/JsonToolCallParser.cs @@ -0,0 +1,516 @@ +using System; +using System.Collections.Generic; +using System.Globalization; +using System.Text; + +namespace WulaFallenEmpire.EventSystem.AI.Utils +{ + public sealed class ToolCallInfo + { + public string Id; + public string Name; + public Dictionary Arguments; + public string ArgumentsJson; + } + + public static class JsonToolCallParser + { + public static bool TryParseToolCalls(string input, out List toolCalls) + { + toolCalls = null; + if (string.IsNullOrWhiteSpace(input)) return false; + + if (!TryParseValue(input, out object root)) return false; + if (root is not Dictionary obj) return false; + + if (!TryGetValue(obj, "tool_calls", out object callsObj)) return false; + if (callsObj is not List callsList) return false; + + var parsedCalls = new List(); + foreach (var entry in callsList) + { + if (entry is not Dictionary callObj) continue; + + string id = TryGetString(callObj, "id"); + string name = null; + object argsObj = null; + + if (TryGetValue(callObj, "function", out object fnObj) && fnObj is Dictionary fnDict) + { + name = TryGetString(fnDict, "name"); + TryGetValue(fnDict, "arguments", out argsObj); + } + else + { + name = TryGetString(callObj, "name"); + TryGetValue(callObj, "arguments", out argsObj); + } + + if (string.IsNullOrWhiteSpace(name)) continue; + + if (!TryNormalizeArguments(argsObj, out Dictionary args, out string argsJson)) + { + args = new Dictionary(StringComparer.OrdinalIgnoreCase); + argsJson = "{}"; + } + + parsedCalls.Add(new ToolCallInfo + { + Id = id, + Name = name.Trim(), + Arguments = args, + ArgumentsJson = argsJson + }); + } + + toolCalls = parsedCalls; + return true; + } + + public static bool TryParseToolCallsFromText(string input, out List toolCalls, out string jsonFragment) + { + toolCalls = null; + jsonFragment = null; + if (string.IsNullOrWhiteSpace(input)) return false; + + string trimmed = input.Trim(); + if (TryParseToolCalls(trimmed, out toolCalls)) + { + jsonFragment = trimmed; + return true; + } + + int firstBrace = trimmed.IndexOf('{'); + int lastBrace = trimmed.LastIndexOf('}'); + if (firstBrace >= 0 && lastBrace > firstBrace) + { + string candidate = trimmed.Substring(firstBrace, lastBrace - firstBrace + 1); + if (TryParseToolCalls(candidate, out toolCalls)) + { + jsonFragment = candidate; + return true; + } + } + + return false; + } + + public static bool TryParseObject(string json, out Dictionary obj) + { + obj = null; + if (string.IsNullOrWhiteSpace(json)) return false; + if (!TryParseValue(json, out object value)) return false; + if (value is not Dictionary dict) return false; + obj = dict; + return true; + } + + public static bool LooksLikeJson(string text) + { + if (string.IsNullOrWhiteSpace(text)) return false; + string trimmed = text.TrimStart(); + return trimmed.StartsWith("{", StringComparison.Ordinal) || trimmed.StartsWith("[", StringComparison.Ordinal); + } + + public static string SerializeToJson(object value) + { + var sb = new StringBuilder(); + AppendValue(sb, value); + return sb.ToString(); + } + + private static void AppendValue(StringBuilder sb, object value) + { + if (value == null) + { + sb.Append("null"); + return; + } + + if (value is string s) + { + sb.Append('\"').Append(EscapeJson(s)).Append('\"'); + return; + } + + if (value is bool b) + { + sb.Append(b ? "true" : "false"); + return; + } + + if (value is double d) + { + sb.Append(d.ToString("0.################", CultureInfo.InvariantCulture)); + return; + } + + if (value is float f) + { + sb.Append(f.ToString("0.################", CultureInfo.InvariantCulture)); + return; + } + + if (value is int or long or short or byte) + { + sb.Append(Convert.ToString(value, CultureInfo.InvariantCulture)); + return; + } + + if (value is Dictionary obj) + { + sb.Append('{'); + bool first = true; + foreach (var kvp in obj) + { + if (!first) sb.Append(','); + first = false; + sb.Append('\"').Append(EscapeJson(kvp.Key)).Append('\"').Append(':'); + AppendValue(sb, kvp.Value); + } + sb.Append('}'); + return; + } + + if (value is List list) + { + sb.Append('['); + for (int i = 0; i < list.Count; i++) + { + if (i > 0) sb.Append(','); + AppendValue(sb, list[i]); + } + sb.Append(']'); + return; + } + + sb.Append('\"').Append(EscapeJson(Convert.ToString(value, CultureInfo.InvariantCulture) ?? "")).Append('\"'); + } + + private static bool TryNormalizeArguments(object argsObj, out Dictionary args, out string argsJson) + { + args = null; + argsJson = null; + + if (argsObj == null) + { + args = new Dictionary(StringComparer.OrdinalIgnoreCase); + argsJson = "{}"; + return true; + } + + if (argsObj is Dictionary dict) + { + args = dict; + argsJson = SerializeToJson(dict); + return true; + } + + if (argsObj is string s) + { + if (string.IsNullOrWhiteSpace(s)) + { + args = new Dictionary(StringComparer.OrdinalIgnoreCase); + argsJson = "{}"; + return true; + } + + if (TryParseObject(s, out Dictionary parsed)) + { + args = parsed; + argsJson = SerializeToJson(parsed); + return true; + } + + return false; + } + + return false; + } + + private static bool TryParseValue(string json, out object value) + { + value = null; + var reader = new JsonReader(json); + if (!reader.TryReadValue(out value)) return false; + reader.SkipWhitespace(); + return reader.IsAtEnd; + } + + private static string EscapeJson(string value) + { + if (string.IsNullOrEmpty(value)) return ""; + var sb = new StringBuilder(); + foreach (char c in value) + { + switch (c) + { + case '\\': sb.Append("\\\\"); break; + case '\"': sb.Append("\\\""); break; + case '\n': sb.Append("\\n"); break; + case '\r': sb.Append("\\r"); break; + case '\t': sb.Append("\\t"); break; + default: + if (c < 0x20) + { + sb.Append("\\u").Append(((int)c).ToString("x4", CultureInfo.InvariantCulture)); + } + else + { + sb.Append(c); + } + break; + } + } + return sb.ToString(); + } + + private static bool TryGetValue(Dictionary obj, string key, out object value) + { + if (obj == null) + { + value = null; + return false; + } + foreach (var kvp in obj) + { + if (string.Equals(kvp.Key, key, StringComparison.OrdinalIgnoreCase)) + { + value = kvp.Value; + return true; + } + } + value = null; + return false; + } + + private static string TryGetString(Dictionary obj, string key) + { + if (TryGetValue(obj, key, out object value) && value != null) + { + return Convert.ToString(value, CultureInfo.InvariantCulture); + } + return null; + } + + private sealed class JsonReader + { + private readonly string _text; + private int _index; + + public JsonReader(string text) + { + _text = text ?? ""; + _index = 0; + } + + public bool IsAtEnd => _index >= _text.Length; + + public void SkipWhitespace() + { + while (_index < _text.Length && char.IsWhiteSpace(_text[_index])) + { + _index++; + } + } + + public bool TryReadValue(out object value) + { + value = null; + SkipWhitespace(); + if (IsAtEnd) return false; + + char c = _text[_index]; + if (c == '{') return TryReadObject(out value); + if (c == '[') return TryReadArray(out value); + if (c == '\"') return TryReadString(out value); + if (c == '-' || char.IsDigit(c)) return TryReadNumber(out value); + if (TryReadLiteral("true")) { value = true; return true; } + if (TryReadLiteral("false")) { value = false; return true; } + if (TryReadLiteral("null")) { value = null; return true; } + return false; + } + + private bool TryReadObject(out object value) + { + value = null; + if (!TryReadChar('{')) return false; + SkipWhitespace(); + + var dict = new Dictionary(StringComparer.OrdinalIgnoreCase); + if (TryReadChar('}')) + { + value = dict; + return true; + } + + while (true) + { + SkipWhitespace(); + if (!TryReadString(out object keyObj)) return false; + string key = keyObj as string ?? ""; + SkipWhitespace(); + if (!TryReadChar(':')) return false; + if (!TryReadValue(out object itemValue)) return false; + dict[key] = itemValue; + SkipWhitespace(); + if (TryReadChar('}')) + { + value = dict; + return true; + } + if (!TryReadChar(',')) return false; + } + } + + private bool TryReadArray(out object value) + { + value = null; + if (!TryReadChar('[')) return false; + SkipWhitespace(); + + var list = new List(); + if (TryReadChar(']')) + { + value = list; + return true; + } + + while (true) + { + if (!TryReadValue(out object item)) return false; + list.Add(item); + SkipWhitespace(); + if (TryReadChar(']')) + { + value = list; + return true; + } + if (!TryReadChar(',')) return false; + } + } + + private bool TryReadString(out object value) + { + value = null; + if (!TryReadChar('\"')) return false; + var sb = new StringBuilder(); + while (_index < _text.Length) + { + char c = _text[_index++]; + if (c == '\"') + { + value = sb.ToString(); + return true; + } + if (c == '\\') + { + if (_index >= _text.Length) return false; + char esc = _text[_index++]; + switch (esc) + { + case '\"': sb.Append('\"'); break; + case '\\': sb.Append('\\'); break; + case '/': sb.Append('/'); break; + case 'b': sb.Append('\b'); break; + case 'f': sb.Append('\f'); break; + case 'n': sb.Append('\n'); break; + case 'r': sb.Append('\r'); break; + case 't': sb.Append('\t'); break; + case 'u': + if (_index + 4 > _text.Length) return false; + string hex = _text.Substring(_index, 4); + if (!int.TryParse(hex, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out int code)) + { + return false; + } + sb.Append((char)code); + _index += 4; + break; + default: + return false; + } + } + else + { + sb.Append(c); + } + } + return false; + } + + private bool TryReadNumber(out object value) + { + value = null; + int start = _index; + if (_text[_index] == '-') _index++; + + while (_index < _text.Length && char.IsDigit(_text[_index])) + { + _index++; + } + + bool hasDot = false; + if (_index < _text.Length && _text[_index] == '.') + { + hasDot = true; + _index++; + while (_index < _text.Length && char.IsDigit(_text[_index])) + { + _index++; + } + } + + if (_index < _text.Length && (_text[_index] == 'e' || _text[_index] == 'E')) + { + hasDot = true; + _index++; + if (_index < _text.Length && (_text[_index] == '+' || _text[_index] == '-')) + { + _index++; + } + while (_index < _text.Length && char.IsDigit(_text[_index])) + { + _index++; + } + } + + string number = _text.Substring(start, _index - start); + if (!hasDot && long.TryParse(number, NumberStyles.Integer, CultureInfo.InvariantCulture, out long longVal)) + { + value = longVal; + return true; + } + + if (double.TryParse(number, NumberStyles.Float, CultureInfo.InvariantCulture, out double dbl)) + { + value = dbl; + return true; + } + + return false; + } + + private bool TryReadLiteral(string literal) + { + SkipWhitespace(); + if (_text.Length - _index < literal.Length) return false; + if (string.Compare(_text, _index, literal, 0, literal.Length, StringComparison.OrdinalIgnoreCase) != 0) + { + return false; + } + _index += literal.Length; + return true; + } + + private bool TryReadChar(char expected) + { + SkipWhitespace(); + if (_index >= _text.Length) return false; + if (_text[_index] != expected) return false; + _index++; + return true; + } + } + } +} diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md deleted file mode 100644 index 4f41eed2..00000000 --- a/Source/WulaFallenEmpire/WulaAI_DevDocs/WulaAI_Gemini_Integration_Handover.md +++ /dev/null @@ -1,83 +0,0 @@ -# Wula AI x Gemini Integration: Technical Handover Document - -**Version**: 1.0 -**Date**: 2025-12-28 -**Author**: AntiGravity (Agent) -**Target Audience**: Codex / Future Maintainers - ---- - -## 1. Overview -This document details the specific challenges, bugs, and architectural decisions made to stabilize the integration between **WulaFallenEmpire** (RimWorld Mod) and **Gemini 3 / OpenAI-Compatible Agents**. It specifically addresses "stubborn" issues related to API format compliance, JSON construction, and multimodal context persistence. - ---- - -## 2. Critical Issues & Fixes - -### 2.1 The "Streaming" Trap (SSE Handling) -**Symptoms**: AI responses were truncated (e.g., only "Comman" displayed instead of "Commander"). -**Root Cause**: Even when `stream: false` is explicitly requested in the payload, some API providers (or reverse proxies wrapping Gemini) force a **Server-Sent Events (SSE)** response format (`data: {...}`). The original client only parsed the first line. -**Fix Implementation**: -- **File**: `SimpleAIClient.cs` -> `ExtractContent` -- **Logic**: Inspects response for `data:` prefix. If found, it iterates through **ALL** lines, strips `data:`, parses individual JSON chunks, and aggregates the `choices[0].delta.content` into a single string. -- **Defense**: This ensures compatibility with both standard JSON responses and forced Stream responses. - -### 2.2 The "Trailing Comma" Crash (HTTP 400) -**Symptoms**: AI actions failed silently or returned `400 Bad Request`. -**Root Cause**: In `SimpleAIClient.cs`, the JSON payload construction loop had a logic flaw. -- When filtering out `toolcall` roles inside the loop, the index `i` check `(i < messages.Count - 1)` failed to account for skipped items, leaving a trailing comma after the last valid item: `[{"role":"user",...},]` -> **Invalid JSON**. -- Additionally, if the message list was empty (or all items filtered), the comma after the System Message remained: `[{"role":"system",...},]` -> **Invalid JSON**. -**Fix Implementation**: -- **Logic**: - 1. Pre-filter `validMessages` into a separate list **before** JSON construction. - 2. Only append the comma after the System Message `if (validMessages.Count > 0)`. - 3. Iterate `validMessages` to guarantee correct comma placement between items. - -### 2.3 Gemini 3's "JSON Obsession" & The Dual-Defense Strategy -**Symptoms**: Gemini 3 Flash Preview ignores System Prompts demanding XML (``) and persistently outputs JSON (`[{"action":"click"...}]`). -**Root Cause**: RLHF tuning of newer models biases them heavily towards standard JSON tool-calling schemas, overriding prompt constraints. -**Strategy**: **"Principled Compromise"** (Double Defense). -1. **Layer 1 (Prompt)**: Explicitly list JSON and Markdown as `INVALID EXAMPLES` in `AIIntelligenceCore.cs`. This discourages compliance-oriented models from using them. -2. **Layer 2 (Code Fallback)**: If XML regex fails, the system attempts to parse **Markdown JSON Blocks** (` ```json ... ``` `). - - **File**: `AIIntelligenceCore.cs` -> `ExecuteXmlToolsForPhase` - - **Logic**: Extracts `point` arrays `[x, y]` and synthesizes a valid `` XML tag internally. - -### 2.4 The Coordinate System Mess -**Symptoms**: Clicks occurred off-screen or at (0,0). -**Root Cause**: -- Gemini 3 often returns coordinates in a **0-1000** scale (e.g., `[115, 982]`). -- Previous logic used `Screen.width` normalization, which is **not thread-safe** and caused crashes or incorrect scaling if the assumption was pixel coordinates. -**Fix Implementation**: -- **Logic**: In the JSON Fallback parser, if `x > 1` or `y > 1`, divide by **1000.0f**. This standardizes coordinates to the mod's required 0-1 proportional format. - -### 2.5 Visual Context Persistence (The "Blind Reply" Bug) -**Symptoms**: AI acted correctly (Phase 2) but "forgot" what it saw when replying to the user (Phase 3), or hallucinated headers. -**Root Cause**: -- Phase 3 (Reply) sends a message history ending with System Tool Results. -- `SimpleAIClient` only attached the image if the **very last message** was from `user`. -- Thus, in Phase 3, the image was dropped, rendering the AI blind. -**Fix Implementation**: -- **File**: `SimpleAIClient.cs` -- **Logic**: Instead of checking the last index, the code now searches **backwards** for the `lastUserIndex`. The image is attached to that specific user message, regardless of how many system messages follow it. - ---- - -## 3. Future Maintenance Guide - -### If Gemini 4 Breaks Format Again: -1. **Check `SimpleAIClient.cs`**: Ensure the JSON parser handles whatever new wrapper they add (e.g., nested `candidates`). -2. **Check `AIIntelligenceCore.cs`**: If it invents a new tool format (e.g., YAML), add a regex parser in `ExecuteXmlToolsForPhase` similar to the JSON Fallback. **Do not fight the model; adapt to it.** - -### If API Errors Return: -1. Enable `DevMode` in RimWorld. -2. Check `Player.log` for `[WulaAI] Request Payload`. -3. Copy the payload to a JSON Validator. **Look for trailing commas.** - -### Adding New Visual Tools: -1. Define tool in `Tools/`. -2. Update `GetToolSystemInstruction` whitelist. -3. **Crucially**: If the tool helps with **Action** (Silent), ensure `GetPhaseInstruction` enforces silence. If it helps with **Reply** (Descriptive), ensure it runs in Phase 3. - ---- - -**End of Handover.** diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/JsonOutput.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/JsonOutput.md new file mode 100644 index 00000000..8376122a --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/JsonOutput.md @@ -0,0 +1,58 @@ +JSON Output +In many scenarios, users need the model to output in strict JSON format to achieve structured output, facilitating subsequent parsing. + +DeepSeek provides JSON Output to ensure the model outputs valid JSON strings. + +Notice +To enable JSON Output, users should: + +Set the response_format parameter to {'type': 'json_object'}. +Include the word "json" in the system or user prompt, and provide an example of the desired JSON format to guide the model in outputting valid JSON. +Set the max_tokens parameter reasonably to prevent the JSON string from being truncated midway. +When using the JSON Output feature, the API may occasionally return empty content. We are actively working on optimizing this issue. You can try modifying the prompt to mitigate such problems. +Sample Code +Here is the complete Python code demonstrating the use of JSON Output: + +import json +from openai import OpenAI + +client = OpenAI( + api_key="", + base_url="https://api.deepseek.com", +) + +system_prompt = """ +The user will provide some exam text. Please parse the "question" and "answer" and output them in JSON format. + +EXAMPLE INPUT: +Which is the highest mountain in the world? Mount Everest. + +EXAMPLE JSON OUTPUT: +{ + "question": "Which is the highest mountain in the world?", + "answer": "Mount Everest" +} +""" + +user_prompt = "Which is the longest river in the world? The Nile River." + +messages = [{"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}] + +response = client.chat.completions.create( + model="deepseek-chat", + messages=messages, + response_format={ + 'type': 'json_object' + } +) + +print(json.loads(response.choices[0].message.content)) + + +The model will output: + +{ + "question": "Which is the longest river in the world?", + "answer": "The Nile River" +} \ No newline at end of file diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/ToolCalls.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/ToolCalls.md new file mode 100644 index 00000000..929ce39b --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/deepseek/ToolCalls.md @@ -0,0 +1,273 @@ +Tool Calls +Tool Calls allows the model to call external tools to enhance its capabilities. + +Non-thinking Mode +Sample Code +Here is an example of using Tool Calls to get the current weather information of the user's location, demonstrated with complete Python code. + +For the specific API format of Tool Calls, please refer to the Chat Completion documentation. + +from openai import OpenAI + +def send_messages(messages): + response = client.chat.completions.create( + model="deepseek-chat", + messages=messages, + tools=tools + ) + return response.choices[0].message + +client = OpenAI( + api_key="", + base_url="https://api.deepseek.com", +) + +tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather of a location, the user should supply a location first.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + } + }, + "required": ["location"] + }, + } + }, +] + +messages = [{"role": "user", "content": "How's the weather in Hangzhou, Zhejiang?"}] +message = send_messages(messages) +print(f"User>\t {messages[0]['content']}") + +tool = message.tool_calls[0] +messages.append(message) + +messages.append({"role": "tool", "tool_call_id": tool.id, "content": "24℃"}) +message = send_messages(messages) +print(f"Model>\t {message.content}") + +The execution flow of this example is as follows: + +User: Asks about the current weather in Hangzhou +Model: Returns the function get_weather({location: 'Hangzhou'}) +User: Calls the function get_weather({location: 'Hangzhou'}) and provides the result to the model +Model: Returns in natural language, "The current temperature in Hangzhou is 24°C." +Note: In the above code, the functionality of the get_weather function needs to be provided by the user. The model itself does not execute specific functions. + +Thinking Mode +From DeepSeek-V3.2, the API supports tool use in the thinking mode. For more details, please refer to Thinking Mode + +strict Mode (Beta) +In strict mode, the model strictly adheres to the format requirements of the Function's JSON schema when outputting a tool call, ensuring that the model's output complies with the user's definition. It is supported by both thinking and non-thinking mode. + +To use strict mode, you need to:: + +Use base_url="https://api.deepseek.com/beta" to enable Beta features +In the tools parameter,all function need to set the strict property to true +The server will validate the JSON Schema of the Function provided by the user. If the schema does not conform to the specifications or contains JSON schema types that are not supported by the server, an error message will be returned +The following is an example of a tool definition in the strict mode: + +{ + "type": "function", + "function": { + "name": "get_weather", + "strict": true, + "description": "Get weather of a location, the user should supply a location first.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + } + }, + "required": ["location"], + "additionalProperties": false + } + } +} + +Support Json Schema Types In strict Mode +object +string +number +integer +boolean +array +enum +anyOf +object +The object defines a nested structure containing key-value pairs, where properties specifies the schema for each key (or property) within the object. All properties of every object must be set as required, and the additionalProperties attribute of the object must be set to false. + +Example: + +{ + "type": "object", + "properties": { + "name": { "type": "string" }, + "age": { "type": "integer" } + }, + "required": ["name", "age"], + "additionalProperties": false +} + +string +Supported parameters: + +pattern: Uses regular expressions to constrain the format of the string +format: Validates the string against predefined common formats. Currently supported formats: +email: Email address +hostname: Hostname +ipv4: IPv4 address +ipv6: IPv6 address +uuid: UUID +Unsupported parameters: + +minLength +maxLength +Example: + +{ + "type": "object", + "properties": { + "user_email": { + "type": "string", + "description": "The user's email address", + "format": "email" + }, + "zip_code": { + "type": "string", + "description": "Six digit postal code", + "pattern": "^\\d{6}$" + } + } +} + +number/integer +Supported parameters: +const: Specifies a constant numeric value +default: Defines the default value of the number +minimum: Specifies the minimum value +maximum: Specifies the maximum value +exclusiveMinimum: Defines a value that the number must be greater than +exclusiveMaximum: Defines a value that the number must be less than +multipleOf: Ensures that the number is a multiple of the specified value +Example: + +{ + "type": "object", + "properties": { + "score": { + "type": "integer", + "description": "A number from 1-5, which represents your rating, the higher, the better", + "minimum": 1, + "maximum": 5 + } + }, + "required": ["score"], + "additionalProperties": false +} + +array +Unsupported parameters: +minItems +maxItems +Example: + +{ + "type": "object", + "properties": { + "keywords": { + "type": "array", + "description": "Five keywords of the article, sorted by importance", + "items": { + "type": "string", + "description": "A concise and accurate keyword or phrase." + } + } + }, + "required": ["keywords"], + "additionalProperties": false +} + +enum +The enum ensures that the output is one of the predefined options. For example, in the case of order status, it can only be one of a limited set of specified states. + +Example: + +{ + "type": "object", + "properties": { + "order_status": { + "type": "string", + "description": "Ordering status", + "enum": ["pending", "processing", "shipped", "cancelled"] + } + } +} + +anyOf +Matches any one of the provided schemas, allowing fields to accommodate multiple valid formats. For example, a user's account could be either an email address or a phone number: + +{ + "type": "object", + "properties": { + "account": { + "anyOf": [ + { "type": "string", "format": "email", "description": "可以是电子邮件地址" }, + { "type": "string", "pattern": "^\\d{11}$", "description": "或11位手机号码" } + ] + } + } +} + +$ref and $def +You can use $def to define reusable modules and then use $ref to reference them, reducing schema repetition and enabling modularization. Additionally, $ref can be used independently to define recursive structures. + +{ + "type": "object", + "properties": { + "report_date": { + "type": "string", + "description": "The date when the report was published" + }, + "authors": { + "type": "array", + "description": "The authors of the report", + "items": { + "$ref": "#/$def/author" + } + } + }, + "required": ["report_date", "authors"], + "additionalProperties": false, + "$def": { + "authors": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "author's name" + }, + "institution": { + "type": "string", + "description": "author's institution" + }, + "email": { + "type": "string", + "format": "email", + "description": "author's email" + } + }, + "additionalProperties": false, + "required": ["name", "institution", "email"] + } + } +} \ No newline at end of file diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/google/Function-calling-with-the-Gemini-API.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/google/Function-calling-with-the-Gemini-API.md new file mode 100644 index 00000000..eb33a8b8 --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/google/Function-calling-with-the-Gemini-API.md @@ -0,0 +1,1511 @@ +
+ +Function calling lets you connect models to external tools and APIs. Instead of generating text responses, the model determines when to call specific functions and provides the necessary parameters to execute real-world actions. This allows the model to act as a bridge between natural language and real-world actions and data. Function calling has 3 primary use cases: + +- **Augment Knowledge:**Access information from external sources like databases, APIs, and knowledge bases. +- **Extend Capabilities:**Use external tools to perform computations and extend the limitations of the model, such as using a calculator or creating charts. +- **Take Actions:**Interact with external systems using APIs, such as scheduling appointments, creating invoices, sending emails, or controlling smart home devices. + +Get WeatherSchedule MeetingCreate Chart + +### Python + + from google import genai + from google.genai import types + + # Define the function declaration for the model + schedule_meeting_function = { + "name": "schedule_meeting", + "description": "Schedules a meeting with specified attendees at a given time and date.", + "parameters": { + "type": "object", + "properties": { + "attendees": { + "type": "array", + "items": {"type": "string"}, + "description": "List of people attending the meeting.", + }, + "date": { + "type": "string", + "description": "Date of the meeting (e.g., '2024-07-29')", + }, + "time": { + "type": "string", + "description": "Time of the meeting (e.g., '15:00')", + }, + "topic": { + "type": "string", + "description": "The subject or topic of the meeting.", + }, + }, + "required": ["attendees", "date", "time", "topic"], + }, + } + + # Configure the client and tools + client = genai.Client() + tools = types.Tool(function_declarations=[schedule_meeting_function]) + config = types.GenerateContentConfig(tools=[tools]) + + # Send request with function declarations + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="Schedule a meeting with Bob and Alice for 03/14/2025 at 10:00 AM about the Q3 planning.", + config=config, + ) + + # Check for a function call + if response.candidates[0].content.parts[0].function_call: + function_call = response.candidates[0].content.parts[0].function_call + print(f"Function to call: {function_call.name}") + print(f"Arguments: {function_call.args}") + # In a real app, you would call your function here: + # result = schedule_meeting(**function_call.args) + else: + print("No function call found in the response.") + print(response.text) + +### JavaScript + + import { GoogleGenAI, Type } from '@google/genai'; + + // Configure the client + const ai = new GoogleGenAI({}); + + // Define the function declaration for the model + const scheduleMeetingFunctionDeclaration = { + name: 'schedule_meeting', + description: 'Schedules a meeting with specified attendees at a given time and date.', + parameters: { + type: Type.OBJECT, + properties: { + attendees: { + type: Type.ARRAY, + items: { type: Type.STRING }, + description: 'List of people attending the meeting.', + }, + date: { + type: Type.STRING, + description: 'Date of the meeting (e.g., "2024-07-29")', + }, + time: { + type: Type.STRING, + description: 'Time of the meeting (e.g., "15:00")', + }, + topic: { + type: Type.STRING, + description: 'The subject or topic of the meeting.', + }, + }, + required: ['attendees', 'date', 'time', 'topic'], + }, + }; + + // Send request with function declarations + const response = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: 'Schedule a meeting with Bob and Alice for 03/27/2025 at 10:00 AM about the Q3 planning.', + config: { + tools: [{ + functionDeclarations: [scheduleMeetingFunctionDeclaration] + }], + }, + }); + + // Check for function calls in the response + if (response.functionCalls && response.functionCalls.length > 0) { + const functionCall = response.functionCalls[0]; // Assuming one function call + console.log(`Function to call: ${functionCall.name}`); + console.log(`Arguments: ${JSON.stringify(functionCall.args)}`); + // In a real app, you would call your actual function here: + // const result = await scheduleMeeting(functionCall.args); + } else { + console.log("No function call found in the response."); + console.log(response.text); + } + +### REST + + curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \ + -H "x-goog-api-key: $GEMINI_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [ + { + "role": "user", + "parts": [ + { + "text": "Schedule a meeting with Bob and Alice for 03/27/2025 at 10:00 AM about the Q3 planning." + } + ] + } + ], + "tools": [ + { + "functionDeclarations": [ + { + "name": "schedule_meeting", + "description": "Schedules a meeting with specified attendees at a given time and date.", + "parameters": { + "type": "object", + "properties": { + "attendees": { + "type": "array", + "items": {"type": "string"}, + "description": "List of people attending the meeting." + }, + "date": { + "type": "string", + "description": "Date of the meeting (e.g., '2024-07-29')" + }, + "time": { + "type": "string", + "description": "Time of the meeting (e.g., '15:00')" + }, + "topic": { + "type": "string", + "description": "The subject or topic of the meeting." + } + }, + "required": ["attendees", "date", "time", "topic"] + } + } + ] + } + ] + }' + +## How function calling works + +![function calling overview](https://ai.google.dev/static/gemini-api/docs/images/function-calling-overview.png) + +Function calling involves a structured interaction between your application, the model, and external functions. Here's a breakdown of the process: + +1. **Define Function Declaration:**Define the function declaration in your application code. Function Declarations describe the function's name, parameters, and purpose to the model. +2. **Call LLM with function declarations:**Send user prompt along with the function declaration(s) to the model. It analyzes the request and determines if a function call would be helpful. If so, it responds with a structured JSON object. +3. **Execute Function Code (Your Responsibility):** The Model*does not* execute the function itself. It's your application's responsibility to process the response and check for Function Call, if + - **Yes**: Extract the name and args of the function and execute the corresponding function in your application. + - **No:**The model has provided a direct text response to the prompt (this flow is less emphasized in the example but is a possible outcome). +4. **Create User friendly response:**If a function was executed, capture the result and send it back to the model in a subsequent turn of the conversation. It will use the result to generate a final, user-friendly response that incorporates the information from the function call. + +This process can be repeated over multiple turns, allowing for complex interactions and workflows. The model also supports calling multiple functions in a single turn ([parallel function calling](https://ai.google.dev/gemini-api/docs/function-calling#parallel_function_calling)) and in sequence ([compositional function calling](https://ai.google.dev/gemini-api/docs/function-calling#compositional_function_calling)). + +### Step 1: Define a function declaration + +Define a function and its declaration within your application code that allows users to set light values and make an API request. This function could call external services or APIs. + +### Python + + # Define a function that the model can call to control smart lights + set_light_values_declaration = { + "name": "set_light_values", + "description": "Sets the brightness and color temperature of a light.", + "parameters": { + "type": "object", + "properties": { + "brightness": { + "type": "integer", + "description": "Light level from 0 to 100. Zero is off and 100 is full brightness", + }, + "color_temp": { + "type": "string", + "enum": ["daylight", "cool", "warm"], + "description": "Color temperature of the light fixture, which can be `daylight`, `cool` or `warm`.", + }, + }, + "required": ["brightness", "color_temp"], + }, + } + + # This is the actual function that would be called based on the model's suggestion + def set_light_values(brightness: int, color_temp: str) -> dict[str, int | str]: + """Set the brightness and color temperature of a room light. (mock API). + + Args: + brightness: Light level from 0 to 100. Zero is off and 100 is full brightness + color_temp: Color temperature of the light fixture, which can be `daylight`, `cool` or `warm`. + + Returns: + A dictionary containing the set brightness and color temperature. + """ + return {"brightness": brightness, "colorTemperature": color_temp} + +### JavaScript + + import { Type } from '@google/genai'; + + // Define a function that the model can call to control smart lights + const setLightValuesFunctionDeclaration = { + name: 'set_light_values', + description: 'Sets the brightness and color temperature of a light.', + parameters: { + type: Type.OBJECT, + properties: { + brightness: { + type: Type.NUMBER, + description: 'Light level from 0 to 100. Zero is off and 100 is full brightness', + }, + color_temp: { + type: Type.STRING, + enum: ['daylight', 'cool', 'warm'], + description: 'Color temperature of the light fixture, which can be `daylight`, `cool` or `warm`.', + }, + }, + required: ['brightness', 'color_temp'], + }, + }; + + /** + + * Set the brightness and color temperature of a room light. (mock API) + * @param {number} brightness - Light level from 0 to 100. Zero is off and 100 is full brightness + * @param {string} color_temp - Color temperature of the light fixture, which can be `daylight`, `cool` or `warm`. + * @return {Object} A dictionary containing the set brightness and color temperature. + */ + function setLightValues(brightness, color_temp) { + return { + brightness: brightness, + colorTemperature: color_temp + }; + } + +### Step 2: Call the model with function declarations + +Once you have defined your function declarations, you can prompt the model to use them. It analyzes the prompt and function declarations and decides whether to respond directly or to call a function. If a function is called, the response object will contain a function call suggestion. + +### Python + + from google.genai import types + + # Configure the client and tools + client = genai.Client() + tools = types.Tool(function_declarations=[set_light_values_declaration]) + config = types.GenerateContentConfig(tools=[tools]) + + # Define user prompt + contents = [ + types.Content( + role="user", parts=[types.Part(text="Turn the lights down to a romantic level")] + ) + ] + + # Send request with function declarations + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=contents + config=config, + ) + + print(response.candidates[0].content.parts[0].function_call) + +### JavaScript + + import { GoogleGenAI } from '@google/genai'; + + // Generation config with function declaration + const config = { + tools: [{ + functionDeclarations: [setLightValuesFunctionDeclaration] + }] + }; + + // Configure the client + const ai = new GoogleGenAI({}); + + // Define user prompt + const contents = [ + { + role: 'user', + parts: [{ text: 'Turn the lights down to a romantic level' }] + } + ]; + + // Send request with function declarations + const response = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: contents, + config: config + }); + + console.log(response.functionCalls[0]); + +The model then returns a`functionCall`object in an OpenAPI compatible schema specifying how to call one or more of the declared functions in order to respond to the user's question. + +### Python + + id=None args={'color_temp': 'warm', 'brightness': 25} name='set_light_values' + +### JavaScript + + { + name: 'set_light_values', + args: { brightness: 25, color_temp: 'warm' } + } + +### Step 3: Execute set_light_values function code + +Extract the function call details from the model's response, parse the arguments , and execute the`set_light_values`function. + +### Python + + # Extract tool call details, it may not be in the first part. + tool_call = response.candidates[0].content.parts[0].function_call + + if tool_call.name == "set_light_values": + result = set_light_values(**tool_call.args) + print(f"Function execution result: {result}") + +### JavaScript + + // Extract tool call details + const tool_call = response.functionCalls[0] + + let result; + if (tool_call.name === 'set_light_values') { + result = setLightValues(tool_call.args.brightness, tool_call.args.color_temp); + console.log(`Function execution result: ${JSON.stringify(result)}`); + } + +### Step 4: Create user friendly response with function result and call the model again + +Finally, send the result of the function execution back to the model so it can incorporate this information into its final response to the user. + +### Python + + from google import genai + from google.genai import types + + # Create a function response part + function_response_part = types.Part.from_function_response( + name=tool_call.name, + response={"result": result}, + ) + + # Append function call and result of the function execution to contents + contents.append(response.candidates[0].content) # Append the content from the model's response. + contents.append(types.Content(role="user", parts=[function_response_part])) # Append the function response + + client = genai.Client() + final_response = client.models.generate_content( + model="gemini-2.5-flash", + config=config, + contents=contents, + ) + + print(final_response.text) + +### JavaScript + + // Create a function response part + const function_response_part = { + name: tool_call.name, + response: { result } + } + + // Append function call and result of the function execution to contents + contents.push(response.candidates[0].content); + contents.push({ role: 'user', parts: [{ functionResponse: function_response_part }] }); + + // Get the final response from the model + const final_response = await ai.models.generateContent({ + model: 'gemini-2.5-flash', + contents: contents, + config: config + }); + + console.log(final_response.text); + +This completes the function calling flow. The model successfully used the`set_light_values`function to perform the request action of the user. + +## Function declarations + +When you implement function calling in a prompt, you create a`tools`object, which contains one or more`function declarations`. You define functions using JSON, specifically with a[select subset](https://ai.google.dev/api/caching#Schema)of the[OpenAPI schema](https://spec.openapis.org/oas/v3.0.3#schemaw)format. A single function declaration can include the following parameters: + +- `name`(string): A unique name for the function (`get_weather_forecast`,`send_email`). Use descriptive names without spaces or special characters (use underscores or camelCase). +- `description`(string): A clear and detailed explanation of the function's purpose and capabilities. This is crucial for the model to understand when to use the function. Be specific and provide examples if helpful ("Finds theaters based on location and optionally movie title which is currently playing in theaters."). +- `parameters`(object): Defines the input parameters the function expects. + - `type`(string): Specifies the overall data type, such as`object`. + - `properties`(object): Lists individual parameters, each with: + - `type`(string): The data type of the parameter, such as`string`,`integer`,`boolean, array`. + - `description`(string): A description of the parameter's purpose and format. Provide examples and constraints ("The city and state, e.g., 'San Francisco, CA' or a zip code e.g., '95616'."). + - `enum`(array, optional): If the parameter values are from a fixed set, use "enum" to list the allowed values instead of just describing them in the description. This improves accuracy ("enum": \["daylight", "cool", "warm"\]). + - `required`(array): An array of strings listing the parameter names that are mandatory for the function to operate. + +You can also construct`FunctionDeclarations`from Python functions directly using`types.FunctionDeclaration.from_callable(client=client, callable=your_function)`. + +## Function calling with thinking models + +Gemini 3 and 2.5 series models use an internal["thinking"](https://ai.google.dev/gemini-api/docs/thinking)process to reason through requests. This significantly improves function calling performance, allowing the model to better determine when to call a function and which parameters to use. Because the Gemini API is stateless, models use[thought signatures](https://ai.google.dev/gemini-api/docs/thought-signatures)to maintain context across multi-turn conversations. + +This section covers advanced management of thought signatures and is only necessary if you're manually constructing API requests (e.g., via REST) or manipulating conversation history. + +**If you're using the[Google GenAI SDKs](https://ai.google.dev/gemini-api/docs/libraries)(our official libraries), you don't need to manage this process** . The SDKs automatically handle the necessary steps, as shown in the earlier[example](https://ai.google.dev/gemini-api/docs/function-calling#step-4). + +### Managing conversation history manually + +If you modify the conversation history manually, instead of sending the[complete previous response](https://ai.google.dev/gemini-api/docs/function-calling#step-4)you must correctly handle the`thought_signature`included in the model's turn. + +Follow these rules to ensure the model's context is preserved: + +- Always send the`thought_signature`back to the model inside its original[`Part`](https://ai.google.dev/api#request-body-structure). +- Don't merge a`Part`containing a signature with one that does not. This breaks the positional context of the thought. +- Don't combine two`Parts`that both contain signatures, as the signature strings cannot be merged. + +#### Gemini 3 thought signatures + +In Gemini 3, any[`Part`](https://ai.google.dev/api#request-body-structure)of a model response may contain a thought signature. While we generally recommend returning signatures from all`Part`types, passing back thought signatures is mandatory for function calling. Unless you are manipulating conversation history manually, the Google GenAI SDK will handle thought signatures automatically. + +If you are manipulating conversation history manually, refer to the[Thoughts Signatures](https://ai.google.dev/gemini-api/docs/thought-signatures)page for complete guidance and details on handling thought signatures for Gemini 3. + +### Inspecting thought signatures + +While not necessary for implementation, you can inspect the response to see the`thought_signature`for debugging or educational purposes. + +### Python + + import base64 + # After receiving a response from a model with thinking enabled + # response = client.models.generate_content(...) + + # The signature is attached to the response part containing the function call + part = response.candidates[0].content.parts[0] + if part.thought_signature: + print(base64.b64encode(part.thought_signature).decode("utf-8")) + +### JavaScript + + // After receiving a response from a model with thinking enabled + // const response = await ai.models.generateContent(...) + + // The signature is attached to the response part containing the function call + const part = response.candidates[0].content.parts[0]; + if (part.thoughtSignature) { + console.log(part.thoughtSignature); + } + +Learn more about limitations and usage of thought signatures, and about thinking models in general, on the[Thinking](https://ai.google.dev/gemini-api/docs/thinking#signatures)page. + +## Parallel function calling + +In addition to single turn function calling, you can also call multiple functions at once. Parallel function calling lets you execute multiple functions at once and is used when the functions are not dependent on each other. This is useful in scenarios like gathering data from multiple independent sources, such as retrieving customer details from different databases or checking inventory levels across various warehouses or performing multiple actions such as converting your apartment into a disco. + +### Python + + power_disco_ball = { + "name": "power_disco_ball", + "description": "Powers the spinning disco ball.", + "parameters": { + "type": "object", + "properties": { + "power": { + "type": "boolean", + "description": "Whether to turn the disco ball on or off.", + } + }, + "required": ["power"], + }, + } + + start_music = { + "name": "start_music", + "description": "Play some music matching the specified parameters.", + "parameters": { + "type": "object", + "properties": { + "energetic": { + "type": "boolean", + "description": "Whether the music is energetic or not.", + }, + "loud": { + "type": "boolean", + "description": "Whether the music is loud or not.", + }, + }, + "required": ["energetic", "loud"], + }, + } + + dim_lights = { + "name": "dim_lights", + "description": "Dim the lights.", + "parameters": { + "type": "object", + "properties": { + "brightness": { + "type": "number", + "description": "The brightness of the lights, 0.0 is off, 1.0 is full.", + } + }, + "required": ["brightness"], + }, + } + +### JavaScript + + import { Type } from '@google/genai'; + + const powerDiscoBall = { + name: 'power_disco_ball', + description: 'Powers the spinning disco ball.', + parameters: { + type: Type.OBJECT, + properties: { + power: { + type: Type.BOOLEAN, + description: 'Whether to turn the disco ball on or off.' + } + }, + required: ['power'] + } + }; + + const startMusic = { + name: 'start_music', + description: 'Play some music matching the specified parameters.', + parameters: { + type: Type.OBJECT, + properties: { + energetic: { + type: Type.BOOLEAN, + description: 'Whether the music is energetic or not.' + }, + loud: { + type: Type.BOOLEAN, + description: 'Whether the music is loud or not.' + } + }, + required: ['energetic', 'loud'] + } + }; + + const dimLights = { + name: 'dim_lights', + description: 'Dim the lights.', + parameters: { + type: Type.OBJECT, + properties: { + brightness: { + type: Type.NUMBER, + description: 'The brightness of the lights, 0.0 is off, 1.0 is full.' + } + }, + required: ['brightness'] + } + }; + +Configure the function calling mode to allow using all of the specified tools. To learn more, you can read about[configuring function calling](https://ai.google.dev/gemini-api/docs/function-calling#function_calling_modes). + +### Python + + from google import genai + from google.genai import types + + # Configure the client and tools + client = genai.Client() + house_tools = [ + types.Tool(function_declarations=[power_disco_ball, start_music, dim_lights]) + ] + config = types.GenerateContentConfig( + tools=house_tools, + automatic_function_calling=types.AutomaticFunctionCallingConfig( + disable=True + ), + # Force the model to call 'any' function, instead of chatting. + tool_config=types.ToolConfig( + function_calling_config=types.FunctionCallingConfig(mode='ANY') + ), + ) + + chat = client.chats.create(model="gemini-2.5-flash", config=config) + response = chat.send_message("Turn this place into a party!") + + # Print out each of the function calls requested from this single call + print("Example 1: Forced function calling") + for fn in response.function_calls: + args = ", ".join(f"{key}={val}" for key, val in fn.args.items()) + print(f"{fn.name}({args})") + +### JavaScript + + import { GoogleGenAI } from '@google/genai'; + + // Set up function declarations + const houseFns = [powerDiscoBall, startMusic, dimLights]; + + const config = { + tools: [{ + functionDeclarations: houseFns + }], + // Force the model to call 'any' function, instead of chatting. + toolConfig: { + functionCallingConfig: { + mode: 'any' + } + } + }; + + // Configure the client + const ai = new GoogleGenAI({}); + + // Create a chat session + const chat = ai.chats.create({ + model: 'gemini-2.5-flash', + config: config + }); + const response = await chat.sendMessage({message: 'Turn this place into a party!'}); + + // Print out each of the function calls requested from this single call + console.log("Example 1: Forced function calling"); + for (const fn of response.functionCalls) { + const args = Object.entries(fn.args) + .map(([key, val]) => `${key}=${val}`) + .join(', '); + console.log(`${fn.name}(${args})`); + } + +Each of the printed results reflects a single function call that the model has requested. To send the results back, include the responses in the same order as they were requested. + +The Python SDK supports[automatic function calling](https://ai.google.dev/gemini-api/docs/function-calling#automatic_function_calling_python_only), which automatically converts Python functions to declarations, handles the function call execution and response cycle for you. Following is an example for the disco use case. +**Note:** Automatic Function Calling is a Python SDK only feature at the moment. + +### Python + + from google import genai + from google.genai import types + + # Actual function implementations + def power_disco_ball_impl(power: bool) -> dict: + """Powers the spinning disco ball. + + Args: + power: Whether to turn the disco ball on or off. + + Returns: + A status dictionary indicating the current state. + """ + return {"status": f"Disco ball powered {'on' if power else 'off'}"} + + def start_music_impl(energetic: bool, loud: bool) -> dict: + """Play some music matching the specified parameters. + + Args: + energetic: Whether the music is energetic or not. + loud: Whether the music is loud or not. + + Returns: + A dictionary containing the music settings. + """ + music_type = "energetic" if energetic else "chill" + volume = "loud" if loud else "quiet" + return {"music_type": music_type, "volume": volume} + + def dim_lights_impl(brightness: float) -> dict: + """Dim the lights. + + Args: + brightness: The brightness of the lights, 0.0 is off, 1.0 is full. + + Returns: + A dictionary containing the new brightness setting. + """ + return {"brightness": brightness} + + # Configure the client + client = genai.Client() + config = types.GenerateContentConfig( + tools=[power_disco_ball_impl, start_music_impl, dim_lights_impl] + ) + + # Make the request + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="Do everything you need to this place into party!", + config=config, + ) + + print("\nExample 2: Automatic function calling") + print(response.text) + # I've turned on the disco ball, started playing loud and energetic music, and dimmed the lights to 50% brightness. Let's get this party started! + +## Compositional function calling + +Compositional or sequential function calling allows Gemini to chain multiple function calls together to fulfill a complex request. For example, to answer "Get the temperature in my current location", the Gemini API might first invoke a`get_current_location()`function followed by a`get_weather()`function that takes the location as a parameter. + +The following example demonstrates how to implement compositional function calling using the Python SDK and automatic function calling. + +### Python + +This example uses the automatic function calling feature of the`google-genai`Python SDK. The SDK automatically converts the Python functions to the required schema, executes the function calls when requested by the model, and sends the results back to the model to complete the task. + + import os + from google import genai + from google.genai import types + + # Example Functions + def get_weather_forecast(location: str) -> dict: + """Gets the current weather temperature for a given location.""" + print(f"Tool Call: get_weather_forecast(location={location})") + # TODO: Make API call + print("Tool Response: {'temperature': 25, 'unit': 'celsius'}") + return {"temperature": 25, "unit": "celsius"} # Dummy response + + def set_thermostat_temperature(temperature: int) -> dict: + """Sets the thermostat to a desired temperature.""" + print(f"Tool Call: set_thermostat_temperature(temperature={temperature})") + # TODO: Interact with a thermostat API + print("Tool Response: {'status': 'success'}") + return {"status": "success"} + + # Configure the client and model + client = genai.Client() + config = types.GenerateContentConfig( + tools=[get_weather_forecast, set_thermostat_temperature] + ) + + # Make the request + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="If it's warmer than 20°C in London, set the thermostat to 20°C, otherwise set it to 18°C.", + config=config, + ) + + # Print the final, user-facing response + print(response.text) + +**Expected Output** + +When you run the code, you will see the SDK orchestrating the function calls. The model first calls`get_weather_forecast`, receives the temperature, and then calls`set_thermostat_temperature`with the correct value based on the logic in the prompt. + + Tool Call: get_weather_forecast(location=London) + Tool Response: {'temperature': 25, 'unit': 'celsius'} + Tool Call: set_thermostat_temperature(temperature=20) + Tool Response: {'status': 'success'} + OK. I've set the thermostat to 20°C. + +### JavaScript + +This example shows how to use JavaScript/TypeScript SDK to do comopositional function calling using a manual execution loop. + + import { GoogleGenAI, Type } from "@google/genai"; + + // Configure the client + const ai = new GoogleGenAI({}); + + // Example Functions + function get_weather_forecast({ location }) { + console.log(`Tool Call: get_weather_forecast(location=${location})`); + // TODO: Make API call + console.log("Tool Response: {'temperature': 25, 'unit': 'celsius'}"); + return { temperature: 25, unit: "celsius" }; + } + + function set_thermostat_temperature({ temperature }) { + console.log( + `Tool Call: set_thermostat_temperature(temperature=${temperature})`, + ); + // TODO: Make API call + console.log("Tool Response: {'status': 'success'}"); + return { status: "success" }; + } + + const toolFunctions = { + get_weather_forecast, + set_thermostat_temperature, + }; + + const tools = [ + { + functionDeclarations: [ + { + name: "get_weather_forecast", + description: + "Gets the current weather temperature for a given location.", + parameters: { + type: Type.OBJECT, + properties: { + location: { + type: Type.STRING, + }, + }, + required: ["location"], + }, + }, + { + name: "set_thermostat_temperature", + description: "Sets the thermostat to a desired temperature.", + parameters: { + type: Type.OBJECT, + properties: { + temperature: { + type: Type.NUMBER, + }, + }, + required: ["temperature"], + }, + }, + ], + }, + ]; + + // Prompt for the model + let contents = [ + { + role: "user", + parts: [ + { + text: "If it's warmer than 20°C in London, set the thermostat to 20°C, otherwise set it to 18°C.", + }, + ], + }, + ]; + + // Loop until the model has no more function calls to make + while (true) { + const result = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents, + config: { tools }, + }); + + if (result.functionCalls && result.functionCalls.length > 0) { + const functionCall = result.functionCalls[0]; + + const { name, args } = functionCall; + + if (!toolFunctions[name]) { + throw new Error(`Unknown function call: ${name}`); + } + + // Call the function and get the response. + const toolResponse = toolFunctions[name](args); + + const functionResponsePart = { + name: functionCall.name, + response: { + result: toolResponse, + }, + }; + + // Send the function response back to the model. + contents.push({ + role: "model", + parts: [ + { + functionCall: functionCall, + }, + ], + }); + contents.push({ + role: "user", + parts: [ + { + functionResponse: functionResponsePart, + }, + ], + }); + } else { + // No more function calls, break the loop. + console.log(result.text); + break; + } + } + +**Expected Output** + +When you run the code, you will see the SDK orchestrating the function calls. The model first calls`get_weather_forecast`, receives the temperature, and then calls`set_thermostat_temperature`with the correct value based on the logic in the prompt. + + Tool Call: get_weather_forecast(location=London) + Tool Response: {'temperature': 25, 'unit': 'celsius'} + Tool Call: set_thermostat_temperature(temperature=20) + Tool Response: {'status': 'success'} + OK. It's 25°C in London, so I've set the thermostat to 20°C. + +Compositional function calling is a native[Live API](https://ai.google.dev/gemini-api/docs/live)feature. This means Live API can handle the function calling similar to the Python SDK. + +### Python + + # Light control schemas + turn_on_the_lights_schema = {'name': 'turn_on_the_lights'} + turn_off_the_lights_schema = {'name': 'turn_off_the_lights'} + + prompt = """ + Hey, can you write run some python code to turn on the lights, wait 10s and then turn off the lights? + """ + + tools = [ + {'code_execution': {}}, + {'function_declarations': [turn_on_the_lights_schema, turn_off_the_lights_schema]} + ] + + await run(prompt, tools=tools, modality="AUDIO") + +### JavaScript + + // Light control schemas + const turnOnTheLightsSchema = { name: 'turn_on_the_lights' }; + const turnOffTheLightsSchema = { name: 'turn_off_the_lights' }; + + const prompt = ` + Hey, can you write run some python code to turn on the lights, wait 10s and then turn off the lights? + `; + + const tools = [ + { codeExecution: {} }, + { functionDeclarations: [turnOnTheLightsSchema, turnOffTheLightsSchema] } + ]; + + await run(prompt, tools=tools, modality="AUDIO") + +## Function calling modes + +The Gemini API lets you control how the model uses the provided tools (function declarations). Specifically, you can set the mode within the.`function_calling_config`. + +- `AUTO (Default)`: The model decides whether to generate a natural language response or suggest a function call based on the prompt and context. This is the most flexible mode and recommended for most scenarios. +- `ANY`: The model is constrained to always predict a function call and guarantees function schema adherence. If`allowed_function_names`is not specified, the model can choose from any of the provided function declarations. If`allowed_function_names`is provided as a list, the model can only choose from the functions in that list. Use this mode when you require a function call response to every prompt (if applicable). +- `NONE`: The model is*prohibited*from making function calls. This is equivalent to sending a request without any function declarations. Use this to temporarily disable function calling without removing your tool definitions. +- `VALIDATED`(Preview): The model is constrained to predict either function calls or natural language, and ensures function schema adherence. If`allowed_function_names`is not provided, the model picks from all of the available function declarations. If`allowed_function_names`is provided, the model picks from the set of allowed functions. + +### Python + + from google.genai import types + + # Configure function calling mode + tool_config = types.ToolConfig( + function_calling_config=types.FunctionCallingConfig( + mode="ANY", allowed_function_names=["get_current_temperature"] + ) + ) + + # Create the generation config + config = types.GenerateContentConfig( + tools=[tools], # not defined here. + tool_config=tool_config, + ) + +### JavaScript + + import { FunctionCallingConfigMode } from '@google/genai'; + + // Configure function calling mode + const toolConfig = { + functionCallingConfig: { + mode: FunctionCallingConfigMode.ANY, + allowedFunctionNames: ['get_current_temperature'] + } + }; + + // Create the generation config + const config = { + tools: tools, // not defined here. + toolConfig: toolConfig, + }; + +## Automatic function calling (Python only) + +When using the Python SDK, you can provide Python functions directly as tools. The SDK converts these functions into declarations, manages the function call execution, and handles the response cycle for you. Define your function with type hints and a docstring. For optimal results, it is recommended to use[Google-style docstrings.](https://google.github.io/styleguide/pyguide.html#383-functions-and-methods)The SDK will then automatically: + +1. Detect function call responses from the model. +2. Call the corresponding Python function in your code. +3. Send the function's response back to the model. +4. Return the model's final text response. + +The SDK currently does not parse argument descriptions into the property description slots of the generated function declaration. Instead, it sends the entire docstring as the top-level function description. + +### Python + + from google import genai + from google.genai import types + + # Define the function with type hints and docstring + def get_current_temperature(location: str) -> dict: + """Gets the current temperature for a given location. + + Args: + location: The city and state, e.g. San Francisco, CA + + Returns: + A dictionary containing the temperature and unit. + """ + # ... (implementation) ... + return {"temperature": 25, "unit": "Celsius"} + + # Configure the client + client = genai.Client() + config = types.GenerateContentConfig( + tools=[get_current_temperature] + ) # Pass the function itself + + # Make the request + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="What's the temperature in Boston?", + config=config, + ) + + print(response.text) # The SDK handles the function call and returns the final text + +You can disable automatic function calling with: + +### Python + + config = types.GenerateContentConfig( + tools=[get_current_temperature], + automatic_function_calling=types.AutomaticFunctionCallingConfig(disable=True) + ) + +### Automatic function schema declaration + +The API is able to describe any of the following types.`Pydantic`types are allowed, as long as the fields defined on them are also composed of allowed types. Dict types (like`dict[str: int]`) are not well supported here, don't use them. + +### Python + + AllowedType = ( + int | float | bool | str | list['AllowedType'] | pydantic.BaseModel) + +To see what the inferred schema looks like, you can convert it using[`from_callable`](https://googleapis.github.io/python-genai/genai.html#genai.types.FunctionDeclaration.from_callable): + +### Python + + from google import genai + from google.genai import types + + def multiply(a: float, b: float): + """Returns a * b.""" + return a * b + + client = genai.Client() + fn_decl = types.FunctionDeclaration.from_callable(callable=multiply, client=client) + + # to_json_dict() provides a clean JSON representation. + print(fn_decl.to_json_dict()) + +## Multi-tool use: Combine native tools with function calling + +You can enable multiple tools combining native tools with function calling at the same time. Here's an example that enables two tools,[Grounding with Google Search](https://ai.google.dev/gemini-api/docs/grounding)and[code execution](https://ai.google.dev/gemini-api/docs/code-execution), in a request using the[Live API](https://ai.google.dev/gemini-api/docs/live). +**Note:** Multi-tool use is a-[Live API](https://ai.google.dev/gemini-api/docs/live)only feature at the moment. The`run()`function declaration, which handles the asynchronous websocket setup, is omitted for brevity. + +### Python + + # Multiple tasks example - combining lights, code execution, and search + prompt = """ + Hey, I need you to do three things for me. + + 1. Turn on the lights. + 2. Then compute the largest prime palindrome under 100000. + 3. Then use Google Search to look up information about the largest earthquake in California the week of Dec 5 2024. + + Thanks! + """ + + tools = [ + {'google_search': {}}, + {'code_execution': {}}, + {'function_declarations': [turn_on_the_lights_schema, turn_off_the_lights_schema]} # not defined here. + ] + + # Execute the prompt with specified tools in audio modality + await run(prompt, tools=tools, modality="AUDIO") + +### JavaScript + + // Multiple tasks example - combining lights, code execution, and search + const prompt = ` + Hey, I need you to do three things for me. + + 1. Turn on the lights. + 2. Then compute the largest prime palindrome under 100000. + 3. Then use Google Search to look up information about the largest earthquake in California the week of Dec 5 2024. + + Thanks! + `; + + const tools = [ + { googleSearch: {} }, + { codeExecution: {} }, + { functionDeclarations: [turnOnTheLightsSchema, turnOffTheLightsSchema] } // not defined here. + ]; + + // Execute the prompt with specified tools in audio modality + await run(prompt, {tools: tools, modality: "AUDIO"}); + +Python developers can try this out in the[Live API Tool Use notebook](https://colab.research.google.com/github/google-gemini/cookbook/blob/main/quickstarts/Get_started_LiveAPI_tools.ipynb). + +## Multimodal function responses + +| **Note:** This feature is available for[Gemini 3](https://ai.google.dev/gemini-api/docs/gemini-3)series models. + +For Gemini 3 series models, you can include multimodal content in the function response parts that you send to the model. The model can process this multimodal content in its next turn to produce a more informed response. The following MIME types are supported for multimodal content in function responses: + +- **Images** :`image/png`,`image/jpeg`,`image/webp` +- **Documents** :`application/pdf`,`text/plain` + +To include multimodal data in a function response, include it as one or more parts nested within the`functionResponse`part. Each multimodal part must contain`inlineData`. If you reference a multimodal part from within the structured`response`field, it must contain a unique`displayName`. + +You can also reference a multimodal part from within the structured`response`field of the`functionResponse`part by using the JSON reference format`{"$ref": ""}`. The model substitutes the reference with the multimodal content when processing the response. Each`displayName`can only be referenced once in the structured`response`field. + +The following example shows a message containing a`functionResponse`for a function named`get_image`and a nested part containing image data with`displayName: "wakeupcat.jpg"`. The`functionResponse`'s`response`field references this image part: + +### Python + + from google import genai + from google.genai import types + + client = genai.Client() + + # This is a manual, two turn multimodal function calling workflow: + + # 1. Define the function tool + get_image_declaration = types.FunctionDeclaration( + name="get_image", + description="Retrieves the image file reference for a specific order item.", + parameters={ + "type": "object", + "properties": { + "item_name": { + "type": "string", + "description": "The name or description of the item ordered (e.g., 'green shirt')." + } + }, + "required": ["item_name"], + }, + ) + tool_config = types.Tool(function_declarations=[get_image_declaration]) + + # 2. Send a message that triggers the tool + prompt = "Show me the green shirt I ordered last month." + response_1 = client.models.generate_content( + model="gemini-3-flash-preview", + contents=[prompt], + config=types.GenerateContentConfig( + tools=[tool_config], + ) + ) + + # 3. Handle the function call + function_call = response_1.function_calls[0] + requested_item = function_call.args["item_name"] + print(f"Model wants to call: {function_call.name}") + + # Execute your tool (e.g., call an API) + # (This is a mock response for the example) + print(f"Calling external tool for: {requested_item}") + + function_response_data = { + "image_ref": {"$ref": "dress.jpg"}, + } + + function_response_multimodal_data = types.FunctionResponsePart( + file_data=types.FunctionResponseFileData( + mime_type="image/png", + display_name="dress.jpg", + file_uri="gs://cloud-samples-data/generative-ai/image/dress.jpg", + ) + ) + + # 4. Send the tool's result back + # Append this turn's messages to history for a final response. + history = [ + types.Content(role="user", parts=[types.Part(text=prompt)]), + response_1.candidates[0].content, + types.Content( + role="tool", + parts=[ + types.Part.from_function_response( + name=function_call.name, + response=function_response_data, + parts=[function_response_multimodal_data] + ) + ], + ) + ] + + response_2 = client.models.generate_content( + model="gemini-3-flash-preview", + contents=history, + config=types.GenerateContentConfig( + tools=[tool_config], + thinking_config=types.ThinkingConfig(include_thoughts=True) + ), + ) + + print(f"\nFinal model response: {response_2.text}") + +### JavaScript + + import { GoogleGenAI, Type } from '@google/genai'; + + const client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_KEY }); + + // This is a manual, two turn multimodal function calling workflow: + // 1. Define the function tool + const getImageDeclaration = { + name: 'get_image', + description: 'Retrieves the image file reference for a specific order item.', + parameters: { + type: Type.OBJECT, + properties: { + item_name: { + type: Type.STRING, + description: "The name or description of the item ordered (e.g., 'green shirt').", + }, + }, + required: ['item_name'], + }, + }; + + const toolConfig = { + functionDeclarations: [getImageDeclaration], + }; + + // 2. Send a message that triggers the tool + const prompt = 'Show me the green shirt I ordered last month.'; + const response1 = await client.models.generateContent({ + model: 'gemini-3-flash-preview', + contents: prompt, + config: { + tools: [toolConfig], + }, + }); + + // 3. Handle the function call + const functionCall = response1.functionCalls[0]; + const requestedItem = functionCall.args.item_name; + console.log(`Model wants to call: ${functionCall.name}`); + + // Execute your tool (e.g., call an API) + // (This is a mock response for the example) + console.log(`Calling external tool for: ${requestedItem}`); + + const functionResponseData = { + image_ref: { $ref: 'dress.jpg' }, + }; + + const functionResponseMultimodalData = { + fileData: { + mimeType: 'image/png', + displayName: 'dress.jpg', + fileUri: 'gs://cloud-samples-data/generative-ai/image/dress.jpg', + }, + }; + + // 4. Send the tool's result back + // Append this turn's messages to history for a final response. + const history = [ + { role: 'user', parts: [{ text: prompt }] }, + response1.candidates[0].content, + { + role: 'tool', + parts: [ + { + functionResponse: { + name: functionCall.name, + response: functionResponseData, + parts: [functionResponseMultimodalData], + }, + }, + ], + }, + ]; + + const response2 = await client.models.generateContent({ + model: 'gemini-3-flash-preview', + contents: history, + config: { + tools: [toolConfig], + thinkingConfig: { includeThoughts: true }, + }, + }); + + console.log(`\nFinal model response: ${response2.text}`); + +### REST + + "contents": [ + ..., + { + "role": "user", + "parts": [ + { + "functionResponse": { + "name": "get_image", + "response": { + "image_ref": { + "$ref": "wakeupcat.jpg" + } + }, + "parts": [ + { + "fileData": { + "displayName": "wakeupcat.jpg", + "mimeType": "image/jpeg", + "fileUri": "gs://cloud-samples-data/vision/label/wakeupcat.jpg" + } + } + ] + } + } + ] + } + ] + +## Model context protocol (MCP) + +[Model Context Protocol (MCP)](https://modelcontextprotocol.io/introduction)is an open standard for connecting AI applications with external tools and data. MCP provides a common protocol for models to access context, such as functions (tools), data sources (resources), or predefined prompts. + +The Gemini SDKs have built-in support for the MCP, reducing boilerplate code and offering[automatic tool calling](https://ai.google.dev/gemini-api/docs/function-calling#automatic_function_calling_python_only)for MCP tools. When the model generates an MCP tool call, the Python and JavaScript client SDK can automatically execute the MCP tool and send the response back to the model in a subsequent request, continuing this loop until no more tool calls are made by the model. + +Here, you can find an example of how to use a local MCP server with Gemini and`mcp`SDK. + +### Python + +Make sure the latest version of the[`mcp`SDK](https://modelcontextprotocol.io/introduction)is installed on your platform of choice. + + pip install mcp + +**Note:** Python supports automatic tool calling by passing in the`ClientSession`into the`tools`parameters. If you want to disable it, you can provide`automatic_function_calling`with disabled`True`. + + import os + import asyncio + from datetime import datetime + from mcp import ClientSession, StdioServerParameters + from mcp.client.stdio import stdio_client + from google import genai + + client = genai.Client() + + # Create server parameters for stdio connection + server_params = StdioServerParameters( + command="npx", # Executable + args=["-y", "@philschmid/weather-mcp"], # MCP Server + env=None, # Optional environment variables + ) + + async def run(): + async with stdio_client(server_params) as (read, write): + async with ClientSession(read, write) as session: + # Prompt to get the weather for the current day in London. + prompt = f"What is the weather in London in {datetime.now().strftime('%Y-%m-%d')}?" + + # Initialize the connection between client and server + await session.initialize() + + # Send request to the model with MCP function declarations + response = await client.aio.models.generate_content( + model="gemini-2.5-flash", + contents=prompt, + config=genai.types.GenerateContentConfig( + temperature=0, + tools=[session], # uses the session, will automatically call the tool + # Uncomment if you **don't** want the SDK to automatically call the tool + # automatic_function_calling=genai.types.AutomaticFunctionCallingConfig( + # disable=True + # ), + ), + ) + print(response.text) + + # Start the asyncio event loop and run the main function + asyncio.run(run()) + +### JavaScript + +Make sure the latest version of the`mcp`SDK is installed on your platform of choice. + + npm install @modelcontextprotocol/sdk + +**Note:** JavaScript supports automatic tool calling by wrapping the`client`with`mcpToTool`. If you want to disable it, you can provide`automaticFunctionCalling`with disabled`true`. + + import { GoogleGenAI, FunctionCallingConfigMode , mcpToTool} from '@google/genai'; + import { Client } from "@modelcontextprotocol/sdk/client/index.js"; + import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js"; + + // Create server parameters for stdio connection + const serverParams = new StdioClientTransport({ + command: "npx", // Executable + args: ["-y", "@philschmid/weather-mcp"] // MCP Server + }); + + const client = new Client( + { + name: "example-client", + version: "1.0.0" + } + ); + + // Configure the client + const ai = new GoogleGenAI({}); + + // Initialize the connection between client and server + await client.connect(serverParams); + + // Send request to the model with MCP tools + const response = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents: `What is the weather in London in ${new Date().toLocaleDateString()}?`, + config: { + tools: [mcpToTool(client)], // uses the session, will automatically call the tool + // Uncomment if you **don't** want the sdk to automatically call the tool + // automaticFunctionCalling: { + // disable: true, + // }, + }, + }); + console.log(response.text) + + // Close the connection + await client.close(); + +### Limitations with built-in MCP support + +Built-in MCP support is a[experimental](https://ai.google.dev/gemini-api/docs/models#preview)feature in our SDKs and has the following limitations: + +- Only tools are supported, not resources nor prompts +- It is available for the Python and JavaScript/TypeScript SDK. +- Breaking changes might occur in future releases. + +Manual integration of MCP servers is always an option if these limit what you're building. + +## Supported models + +This section lists models and their function calling capabilities. Experimental models are not included. You can find a comprehensive capabilities overview on the[model overview](https://ai.google.dev/gemini-api/docs/models)page. + +| Model | Function Calling | Parallel Function Calling | Compositional Function Calling | +|-----------------------|------------------|---------------------------|--------------------------------| +| Gemini 3 Pro | ✔️ | ✔️ | ✔️ | +| Gemini 3 Flash | ✔️ | ✔️ | ✔️ | +| Gemini 2.5 Pro | ✔️ | ✔️ | ✔️ | +| Gemini 2.5 Flash | ✔️ | ✔️ | ✔️ | +| Gemini 2.5 Flash-Lite | ✔️ | ✔️ | ✔️ | +| Gemini 2.0 Flash | ✔️ | ✔️ | ✔️ | +| Gemini 2.0 Flash-Lite | X | X | X | + +## Best practices + +- **Function and Parameter Descriptions:**Be extremely clear and specific in your descriptions. The model relies on these to choose the correct function and provide appropriate arguments. +- **Naming:**Use descriptive function names (without spaces, periods, or dashes). +- **Strong Typing:**Use specific types (integer, string, enum) for parameters to reduce errors. If a parameter has a limited set of valid values, use an enum. +- **Tool Selection:**While the model can use an arbitrary number of tools, providing too many can increase the risk of selecting an incorrect or suboptimal tool. For best results, aim to provide only the relevant tools for the context or task, ideally keeping the active set to a maximum of 10-20. Consider dynamic tool selection based on conversation context if you have a large total number of tools. +- **Prompt Engineering:** + - Provide context: Tell the model its role (e.g., "You are a helpful weather assistant."). + - Give instructions: Specify how and when to use functions (e.g., "Don't guess dates; always use a future date for forecasts."). + - Encourage clarification: Instruct the model to ask clarifying questions if needed. + - See[Agentic workflows](https://ai.google.dev/gemini-api/docs/prompting-strategies#agentic-workflows)for further strategies on designing these prompts. Here is an example of a tested[system instruction](https://ai.google.dev/gemini-api/docs/prompting-strategies#agentic-si-template). +- **Temperature:**Use a low temperature (e.g., 0) for more deterministic and reliable function calls. + + | When using Gemini 3 models, we strongly recommend keeping the`temperature`at its default value of 1.0. Changing the temperature (setting it below 1.0) may lead to unexpected behavior, such as looping or degraded performance, particularly in complex mathematical or reasoning tasks. +- **Validation:**If a function call has significant consequences (e.g., placing an order), validate the call with the user before executing it. + +- **Check Finish Reason:** Always check the[`finishReason`](https://ai.google.dev/api/generate-content#FinishReason)in the model's response to handle cases where the model failed to generate a valid function call. + +- **Error Handling**: Implement robust error handling in your functions to gracefully handle unexpected inputs or API failures. Return informative error messages that the model can use to generate helpful responses to the user. + +- **Security:**Be mindful of security when calling external APIs. Use appropriate authentication and authorization mechanisms. Avoid exposing sensitive data in function calls. + +- **Token Limits:**Function descriptions and parameters count towards your input token limit. If you're hitting token limits, consider limiting the number of functions or the length of the descriptions, break down complex tasks into smaller, more focused function sets. + +## Notes and limitations + +- Only a[subset of the OpenAPI schema](https://ai.google.dev/api/caching#FunctionDeclaration)is supported. +- Supported parameter types in Python are limited. +- Automatic function calling is a Python SDK feature only. \ No newline at end of file diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/google/Structured-Outputs.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/google/Structured-Outputs.md new file mode 100644 index 00000000..430459dc --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/google/Structured-Outputs.md @@ -0,0 +1,533 @@ +
+ +You can configure Gemini models to generate responses that adhere to a provided JSON Schema. This capability guarantees predictable and parsable results, ensures format and type-safety, enables the programmatic detection of refusals, and simplifies prompting. + +Using structured outputs is ideal for a wide range of applications: + +- **Data extraction:**Pull specific information from unstructured text, like extracting names, dates, and amounts from an invoice. +- **Structured classification:**Classify text into predefined categories and assign structured labels, such as categorizing customer feedback by sentiment and topic. +- **Agentic workflows:**Generate structured data that can be used to call other tools or APIs, like creating a character sheet for a game or filling out a form. + +In addition to supporting JSON Schema in the REST API, the Google GenAI SDKs for Python and JavaScript also make it easy to define object schemas using[Pydantic](https://docs.pydantic.dev/latest/)and[Zod](https://zod.dev/), respectively. The example below demonstrates how to extract information from unstructured text that conforms to a schema defined in code. + +Recipe ExtractorContent ModerationRecursive Structures + +This example demonstrates how to extract structured data from text using basic JSON Schema types like`object`,`array`,`string`, and`integer`. + +### Python + + from google import genai + from pydantic import BaseModel, Field + from typing import List, Optional + + class Ingredient(BaseModel): + name: str = Field(description="Name of the ingredient.") + quantity: str = Field(description="Quantity of the ingredient, including units.") + + class Recipe(BaseModel): + recipe_name: str = Field(description="The name of the recipe.") + prep_time_minutes: Optional[int] = Field(description="Optional time in minutes to prepare the recipe.") + ingredients: List[Ingredient] + instructions: List[str] + + client = genai.Client() + + prompt = """ + Please extract the recipe from the following text. + The user wants to make delicious chocolate chip cookies. + They need 2 and 1/4 cups of all-purpose flour, 1 teaspoon of baking soda, + 1 teaspoon of salt, 1 cup of unsalted butter (softened), 3/4 cup of granulated sugar, + 3/4 cup of packed brown sugar, 1 teaspoon of vanilla extract, and 2 large eggs. + For the best part, they'll need 2 cups of semisweet chocolate chips. + First, preheat the oven to 375°F (190°C). Then, in a small bowl, whisk together the flour, + baking soda, and salt. In a large bowl, cream together the butter, granulated sugar, and brown sugar + until light and fluffy. Beat in the vanilla and eggs, one at a time. Gradually beat in the dry + ingredients until just combined. Finally, stir in the chocolate chips. Drop by rounded tablespoons + onto ungreased baking sheets and bake for 9 to 11 minutes. + """ + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=prompt, + config={ + "response_mime_type": "application/json", + "response_json_schema": Recipe.model_json_schema(), + }, + ) + + recipe = Recipe.model_validate_json(response.text) + print(recipe) + +### JavaScript + + import { GoogleGenAI } from "@google/genai"; + import { z } from "zod"; + import { zodToJsonSchema } from "zod-to-json-schema"; + + const ingredientSchema = z.object({ + name: z.string().describe("Name of the ingredient."), + quantity: z.string().describe("Quantity of the ingredient, including units."), + }); + + const recipeSchema = z.object({ + recipe_name: z.string().describe("The name of the recipe."), + prep_time_minutes: z.number().optional().describe("Optional time in minutes to prepare the recipe."), + ingredients: z.array(ingredientSchema), + instructions: z.array(z.string()), + }); + + const ai = new GoogleGenAI({}); + + const prompt = ` + Please extract the recipe from the following text. + The user wants to make delicious chocolate chip cookies. + They need 2 and 1/4 cups of all-purpose flour, 1 teaspoon of baking soda, + 1 teaspoon of salt, 1 cup of unsalted butter (softened), 3/4 cup of granulated sugar, + 3/4 cup of packed brown sugar, 1 teaspoon of vanilla extract, and 2 large eggs. + For the best part, they'll need 2 cups of semisweet chocolate chips. + First, preheat the oven to 375°F (190°C). Then, in a small bowl, whisk together the flour, + baking soda, and salt. In a large bowl, cream together the butter, granulated sugar, and brown sugar + until light and fluffy. Beat in the vanilla and eggs, one at a time. Gradually beat in the dry + ingredients until just combined. Finally, stir in the chocolate chips. Drop by rounded tablespoons + onto ungreased baking sheets and bake for 9 to 11 minutes. + `; + + const response = await ai.models.generateContent({ + model: "gemini-2.5-flash", + contents: prompt, + config: { + responseMimeType: "application/json", + responseJsonSchema: zodToJsonSchema(recipeSchema), + }, + }); + + const recipe = recipeSchema.parse(JSON.parse(response.text)); + console.log(recipe); + +### Go + + package main + + import ( + "context" + "fmt" + "log" + + "google.golang.org/genai" + ) + + func main() { + ctx := context.Background() + client, err := genai.NewClient(ctx, nil) + if err != nil { + log.Fatal(err) + } + + prompt := ` + Please extract the recipe from the following text. + The user wants to make delicious chocolate chip cookies. + They need 2 and 1/4 cups of all-purpose flour, 1 teaspoon of baking soda, + 1 teaspoon of salt, 1 cup of unsalted butter (softened), 3/4 cup of granulated sugar, + 3/4 cup of packed brown sugar, 1 teaspoon of vanilla extract, and 2 large eggs. + For the best part, they'll need 2 cups of semisweet chocolate chips. + First, preheat the oven to 375°F (190°C). Then, in a small bowl, whisk together the flour, + baking soda, and salt. In a large bowl, cream together the butter, granulated sugar, and brown sugar + until light and fluffy. Beat in the vanilla and eggs, one at a time. Gradually beat in the dry + ingredients until just combined. Finally, stir in the chocolate chips. Drop by rounded tablespoons + onto ungreased baking sheets and bake for 9 to 11 minutes. + ` + config := &genai.GenerateContentConfig{ + ResponseMIMEType: "application/json", + ResponseJsonSchema: map[string]any{ + "type": "object", + "properties": map[string]any{ + "recipe_name": map[string]any{ + "type": "string", + "description": "The name of the recipe.", + }, + "prep_time_minutes": map[string]any{ + "type": "integer", + "description": "Optional time in minutes to prepare the recipe.", + }, + "ingredients": map[string]any{ + "type": "array", + "items": map[string]any{ + "type": "object", + "properties": map[string]any{ + "name": map[string]any{ + "type": "string", + "description": "Name of the ingredient.", + }, + "quantity": map[string]any{ + "type": "string", + "description": "Quantity of the ingredient, including units.", + }, + }, + "required": []string{"name", "quantity"}, + }, + }, + "instructions": map[string]any{ + "type": "array", + "items": map[string]any{"type": "string"}, + }, + }, + "required": []string{"recipe_name", "ingredients", "instructions"}, + }, + } + + result, err := client.Models.GenerateContent( + ctx, + "gemini-2.5-flash", + genai.Text(prompt), + config, + ) + if err != nil { + log.Fatal(err) + } + fmt.Println(result.Text()) + } + +### REST + + curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent" \ + -H "x-goog-api-key: $GEMINI_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts":[ + { "text": "Please extract the recipe from the following text.\nThe user wants to make delicious chocolate chip cookies.\nThey need 2 and 1/4 cups of all-purpose flour, 1 teaspoon of baking soda,\n1 teaspoon of salt, 1 cup of unsalted butter (softened), 3/4 cup of granulated sugar,\n3/4 cup of packed brown sugar, 1 teaspoon of vanilla extract, and 2 large eggs.\nFor the best part, they will need 2 cups of semisweet chocolate chips.\nFirst, preheat the oven to 375°F (190°C). Then, in a small bowl, whisk together the flour,\nbaking soda, and salt. In a large bowl, cream together the butter, granulated sugar, and brown sugar\nuntil light and fluffy. Beat in the vanilla and eggs, one at a time. Gradually beat in the dry\ningredients until just combined. Finally, stir in the chocolate chips. Drop by rounded tablespoons\nonto ungreased baking sheets and bake for 9 to 11 minutes." } + ] + }], + "generationConfig": { + "responseMimeType": "application/json", + "responseJsonSchema": { + "type": "object", + "properties": { + "recipe_name": { + "type": "string", + "description": "The name of the recipe." + }, + "prep_time_minutes": { + "type": "integer", + "description": "Optional time in minutes to prepare the recipe." + }, + "ingredients": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { "type": "string", "description": "Name of the ingredient."}, + "quantity": { "type": "string", "description": "Quantity of the ingredient, including units."} + }, + "required": ["name", "quantity"] + } + }, + "instructions": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["recipe_name", "ingredients", "instructions"] + } + } + }' + +**Example Response:** + + { + "recipe_name": "Delicious Chocolate Chip Cookies", + "ingredients": [ + { + "name": "all-purpose flour", + "quantity": "2 and 1/4 cups" + }, + { + "name": "baking soda", + "quantity": "1 teaspoon" + }, + { + "name": "salt", + "quantity": "1 teaspoon" + }, + { + "name": "unsalted butter (softened)", + "quantity": "1 cup" + }, + { + "name": "granulated sugar", + "quantity": "3/4 cup" + }, + { + "name": "packed brown sugar", + "quantity": "3/4 cup" + }, + { + "name": "vanilla extract", + "quantity": "1 teaspoon" + }, + { + "name": "large eggs", + "quantity": "2" + }, + { + "name": "semisweet chocolate chips", + "quantity": "2 cups" + } + ], + "instructions": [ + "Preheat the oven to 375°F (190°C).", + "In a small bowl, whisk together the flour, baking soda, and salt.", + "In a large bowl, cream together the butter, granulated sugar, and brown sugar until light and fluffy.", + "Beat in the vanilla and eggs, one at a time.", + "Gradually beat in the dry ingredients until just combined.", + "Stir in the chocolate chips.", + "Drop by rounded tablespoons onto ungreased baking sheets and bake for 9 to 11 minutes." + ] + } + +## Streaming + +You can stream structured outputs, which allows you to start processing the response as it's being generated, without having to wait for the entire output to be complete. This can improve the perceived performance of your application. + +The streamed chunks will be valid partial JSON strings, which can be concatenated to form the final, complete JSON object. + +### Python + + from google import genai + from pydantic import BaseModel, Field + from typing import Literal + + class Feedback(BaseModel): + sentiment: Literal["positive", "neutral", "negative"] + summary: str + + client = genai.Client() + prompt = "The new UI is incredibly intuitive and visually appealing. Great job. Add a very long summary to test streaming!" + + response_stream = client.models.generate_content_stream( + model="gemini-2.5-flash", + contents=prompt, + config={ + "response_mime_type": "application/json", + "response_json_schema": Feedback.model_json_schema(), + }, + ) + + for chunk in response_stream: + print(chunk.candidates[0].content.parts[0].text) + +### JavaScript + + import { GoogleGenAI } from "@google/genai"; + import { z } from "zod"; + import { zodToJsonSchema } from "zod-to-json-schema"; + + const ai = new GoogleGenAI({}); + const prompt = "The new UI is incredibly intuitive and visually appealing. Great job! Add a very long summary to test streaming!"; + + const feedbackSchema = z.object({ + sentiment: z.enum(["positive", "neutral", "negative"]), + summary: z.string(), + }); + + const stream = await ai.models.generateContentStream({ + model: "gemini-2.5-flash", + contents: prompt, + config: { + responseMimeType: "application/json", + responseJsonSchema: zodToJsonSchema(feedbackSchema), + }, + }); + + for await (const chunk of stream) { + console.log(chunk.candidates[0].content.parts[0].text) + } + +## Structured outputs with tools + +| **Preview:** This is a feature available only for the Gemini 3 series models,`gemini-3-pro-preview`and`gemini-3-flash-preview`. + +Gemini 3 lets you combine Structured Outputs with built-in tools, including[Grounding with Google Search](https://ai.google.dev/gemini-api/docs/google-search),[URL Context](https://ai.google.dev/gemini-api/docs/url-context), and[Code Execution](https://ai.google.dev/gemini-api/docs/code-execution). + +### Python + + from google import genai + from pydantic import BaseModel, Field + from typing import List + + class MatchResult(BaseModel): + winner: str = Field(description="The name of the winner.") + final_match_score: str = Field(description="The final match score.") + scorers: List[str] = Field(description="The name of the scorer.") + + client = genai.Client() + + response = client.models.generate_content( + model="gemini-3-pro-preview", + contents="Search for all details for the latest Euro.", + config={ + "tools": [ + {"google_search": {}}, + {"url_context": {}} + ], + "response_mime_type": "application/json", + "response_json_schema": MatchResult.model_json_schema(), + }, + ) + + result = MatchResult.model_validate_json(response.text) + print(result) + +### JavaScript + + import { GoogleGenAI } from "@google/genai"; + import { z } from "zod"; + import { zodToJsonSchema } from "zod-to-json-schema"; + + const ai = new GoogleGenAI({}); + + const matchSchema = z.object({ + winner: z.string().describe("The name of the winner."), + final_match_score: z.string().describe("The final score."), + scorers: z.array(z.string()).describe("The name of the scorer.") + }); + + async function run() { + const response = await ai.models.generateContent({ + model: "gemini-3-pro-preview", + contents: "Search for all details for the latest Euro.", + config: { + tools: [ + { googleSearch: {} }, + { urlContext: {} } + ], + responseMimeType: "application/json", + responseJsonSchema: zodToJsonSchema(matchSchema), + }, + }); + + const match = matchSchema.parse(JSON.parse(response.text)); + console.log(match); + } + + run(); + +### REST + + curl "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-preview:generateContent" \ + -H "x-goog-api-key: $GEMINI_API_KEY" \ + -H 'Content-Type: application/json' \ + -X POST \ + -d '{ + "contents": [{ + "parts": [{"text": "Search for all details for the latest Euro."}] + }], + "tools": [ + {"googleSearch": {}}, + {"urlContext": {}} + ], + "generationConfig": { + "responseMimeType": "application/json", + "responseJsonSchema": { + "type": "object", + "properties": { + "winner": {"type": "string", "description": "The name of the winner."}, + "final_match_score": {"type": "string", "description": "The final score."}, + "scorers": { + "type": "array", + "items": {"type": "string"}, + "description": "The name of the scorer." + } + }, + "required": ["winner", "final_match_score", "scorers"] + } + } + }' + +## JSON schema support + +To generate a JSON object, set the`response_mime_type`in the generation configuration to`application/json`and provide a`response_json_schema`. The schema must be a valid[JSON Schema](https://json-schema.org/)that describes the desired output format. + +The model will then generate a response that is a syntactically valid JSON string matching the provided schema. When using structured outputs, the model will produce outputs in the same order as the keys in the schema. + +Gemini's structured output mode supports a subset of the[JSON Schema](https://json-schema.org)specification. + +The following values of`type`are supported: + +- **`string`**: For text. +- **`number`**: For floating-point numbers. +- **`integer`**: For whole numbers. +- **`boolean`**: For true/false values. +- **`object`**: For structured data with key-value pairs. +- **`array`**: For lists of items. +- **`null`** : To allow a property to be null, include`"null"`in the type array (e.g.,`{"type": ["string", "null"]}`). + +These descriptive properties help guide the model: + +- **`title`**: A short description of a property. +- **`description`**: A longer and more detailed description of a property. + +### Type-specific properties + +**For`object`values:** + +- **`properties`**: An object where each key is a property name and each value is a schema for that property. +- **`required`**: An array of strings, listing which properties are mandatory. +- **`additionalProperties`** : Controls whether properties not listed in`properties`are allowed. Can be a boolean or a schema. + +**For`string`values:** + +- **`enum`**: Lists a specific set of possible strings for classification tasks. +- **`format`** : Specifies a syntax for the string, such as`date-time`,`date`,`time`. + +**For`number`and`integer`values:** + +- **`enum`**: Lists a specific set of possible numeric values. +- **`minimum`**: The minimum inclusive value. +- **`maximum`**: The maximum inclusive value. + +**For`array`values:** + +- **`items`**: Defines the schema for all items in the array. +- **`prefixItems`**: Defines a list of schemas for the first N items, allowing for tuple-like structures. +- **`minItems`**: The minimum number of items in the array. +- **`maxItems`**: The maximum number of items in the array. + +## Model support + +The following models support structured output: + +| Model | Structured Outputs | +|------------------------|--------------------| +| Gemini 3 Pro Preview | ✔️ | +| Gemini 3 Flash Preview | ✔️ | +| Gemini 2.5 Pro | ✔️ | +| Gemini 2.5 Flash | ✔️ | +| Gemini 2.5 Flash-Lite | ✔️ | +| Gemini 2.0 Flash | ✔️\* | +| Gemini 2.0 Flash-Lite | ✔️\* | + +*\* Note that Gemini 2.0 requires an explicit`propertyOrdering`list within the JSON input to define the preferred structure. You can find an example in this[cookbook](https://github.com/google-gemini/cookbook/blob/main/examples/Pdf_structured_outputs_on_invoices_and_forms.ipynb).* + +## Structured outputs vs. function calling + +Both structured outputs and function calling use JSON schemas, but they serve different purposes: + +| Feature | Primary Use Case | +|------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| **Structured Outputs** | **Formatting the final response to the user.** Use this when you want the model's*answer*to be in a specific format (e.g., extracting data from a document to save to a database). | +| **Function Calling** | **Taking action during the conversation.** Use this when the model needs to*ask you*to perform a task (e.g., "get current weather") before it can provide a final answer. | + +## Best practices + +- **Clear descriptions:** Use the`description`field in your schema to provide clear instructions to the model about what each property represents. This is crucial for guiding the model's output. +- **Strong typing:** Use specific types (`integer`,`string`,`enum`) whenever possible. If a parameter has a limited set of valid values, use an`enum`. +- **Prompt engineering:**Clearly state in your prompt what you want the model to do. For example, "Extract the following information from the text..." or "Classify this feedback according to the provided schema...". +- **Validation:**While structured output guarantees syntactically correct JSON, it does not guarantee the values are semantically correct. Always validate the final output in your application code before using it. +- **Error handling:**Implement robust error handling in your application to gracefully manage cases where the model's output, while schema-compliant, may not meet your business logic requirements. + +## Limitations + +- **Schema subset:**Not all features of the JSON Schema specification are supported. The model ignores unsupported properties. +- **Schema complexity:**The API may reject very large or deeply nested schemas. If you encounter errors, try simplifying your schema by shortening property names, reducing nesting, or limiting the number of constraints. \ No newline at end of file diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/openai/Structured-outputs.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/openai/Structured-outputs.md new file mode 100644 index 00000000..0bc38dab --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/openai/Structured-outputs.md @@ -0,0 +1,1844 @@ +Structured model outputs +======================== + +Ensure text responses from the model adhere to a JSON schema you define. + +JSON is one of the most widely used formats in the world for applications to exchange data. + +Structured Outputs is a feature that ensures the model will always generate responses that adhere to your supplied [JSON Schema](https://json-schema.org/overview/what-is-jsonschema), so you don't need to worry about the model omitting a required key, or hallucinating an invalid enum value. + +Some benefits of Structured Outputs include: + +1. **Reliable type-safety:** No need to validate or retry incorrectly formatted responses +2. **Explicit refusals:** Safety-based model refusals are now programmatically detectable +3. **Simpler prompting:** No need for strongly worded prompts to achieve consistent formatting + +In addition to supporting JSON Schema in the REST API, the OpenAI SDKs for [Python](https://github.com/openai/openai-python/blob/main/helpers.md#structured-outputs-parsing-helpers) and [JavaScript](https://github.com/openai/openai-node/blob/master/helpers.md#structured-outputs-parsing-helpers) also make it easy to define object schemas using [Pydantic](https://docs.pydantic.dev/latest/) and [Zod](https://zod.dev/) respectively. Below, you can see how to extract information from unstructured text that conforms to a schema defined in code. + +Getting a structured response + +``` +import OpenAI from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const openai = new OpenAI(); + +const CalendarEvent = z.object({ + name: z.string(), + date: z.string(), + participants: z.array(z.string()), +}); + +const response = await openai.responses.parse({ + model: "gpt-4o-2024-08-06", + input: [ + { role: "system", content: "Extract the event information." }, + { + role: "user", + content: "Alice and Bob are going to a science fair on Friday.", + }, + ], + text: { + format: zodTextFormat(CalendarEvent, "event"), + }, +}); + +const event = response.output_parsed; +``` + +``` +from openai import OpenAI +from pydantic import BaseModel + +client = OpenAI() + +class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + +response = client.responses.parse( + model="gpt-4o-2024-08-06", + input=[ + {"role": "system", "content": "Extract the event information."}, + { + "role": "user", + "content": "Alice and Bob are going to a science fair on Friday.", + }, + ], + text_format=CalendarEvent, +) + +event = response.output_parsed +``` + +### Supported models + +Structured Outputs is available in our [latest large language models](/docs/models), starting with GPT-4o. Older models like `gpt-4-turbo` and earlier may use [JSON mode](/docs/guides/structured-outputs#json-mode) instead. + +When to use Structured Outputs via function calling vs via text.format + +-------------------------------------------------------------------------- + +Structured Outputs is available in two forms in the OpenAI API: + +1. When using [function calling](/docs/guides/function-calling) +2. When using a `json_schema` response format + +Function calling is useful when you are building an application that bridges the models and functionality of your application. + +For example, you can give the model access to functions that query a database in order to build an AI assistant that can help users with their orders, or functions that can interact with the UI. + +Conversely, Structured Outputs via `response_format` are more suitable when you want to indicate a structured schema for use when the model responds to the user, rather than when the model calls a tool. + +For example, if you are building a math tutoring application, you might want the assistant to respond to your user using a specific JSON Schema so that you can generate a UI that displays different parts of the model's output in distinct ways. + +Put simply: + +* If you are connecting the model to tools, functions, data, etc. in your system, then you should use function calling - If you want to structure the model's output when it responds to the user, then you should use a structured `text.format` + +The remainder of this guide will focus on non-function calling use cases in the Responses API. To learn more about how to use Structured Outputs with function calling, check out the + +[ + +Function Calling + +](/docs/guides/function-calling#function-calling-with-structured-outputs) + +guide. + +### Structured Outputs vs JSON mode + +Structured Outputs is the evolution of [JSON mode](/docs/guides/structured-outputs#json-mode). While both ensure valid JSON is produced, only Structured Outputs ensure schema adherence. Both Structured Outputs and JSON mode are supported in the Responses API, Chat Completions API, Assistants API, Fine-tuning API and Batch API. + +We recommend always using Structured Outputs instead of JSON mode when possible. + +However, Structured Outputs with `response_format: {type: "json_schema", ...}` is only supported with the `gpt-4o-mini`, `gpt-4o-mini-2024-07-18`, and `gpt-4o-2024-08-06` model snapshots and later. + +||Structured Outputs|JSON Mode| +|---|---|---| +|Outputs valid JSON|Yes|Yes| +|Adheres to schema|Yes (see supported schemas)|No| +|Compatible models|gpt-4o-mini, gpt-4o-2024-08-06, and later|gpt-3.5-turbo, gpt-4-* and gpt-4o-* models| +|Enabling|text: { format: { type: "json_schema", "strict": true, "schema": ... } }|text: { format: { type: "json_object" } }| + +Examples +-------- + +Chain of thought + +### Chain of thought + +You can ask the model to output an answer in a structured, step-by-step way, to guide the user through the solution. + +Structured Outputs for chain-of-thought math tutoring + +``` +import OpenAI from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const openai = new OpenAI(); + +const Step = z.object({ + explanation: z.string(), + output: z.string(), +}); + +const MathReasoning = z.object({ + steps: z.array(Step), + final_answer: z.string(), +}); + +const response = await openai.responses.parse({ + model: "gpt-4o-2024-08-06", + input: [ + { + role: "system", + content: + "You are a helpful math tutor. Guide the user through the solution step by step.", + }, + { role: "user", content: "how can I solve 8x + 7 = -23" }, + ], + text: { + format: zodTextFormat(MathReasoning, "math_reasoning"), + }, +}); + +const math_reasoning = response.output_parsed; +``` + +``` +from openai import OpenAI +from pydantic import BaseModel + +client = OpenAI() + +class Step(BaseModel): + explanation: str + output: str + +class MathReasoning(BaseModel): + steps: list[Step] + final_answer: str + +response = client.responses.parse( + model="gpt-4o-2024-08-06", + input=[ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step.", + }, + {"role": "user", "content": "how can I solve 8x + 7 = -23"}, + ], + text_format=MathReasoning, +) + +math_reasoning = response.output_parsed +``` + +``` +curl https://api.openai.com/v1/responses \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-2024-08-06", + "input": [ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step." + }, + { + "role": "user", + "content": "how can I solve 8x + 7 = -23" + } + ], + "text": { + "format": { + "type": "json_schema", + "name": "math_reasoning", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "output": { "type": "string" } + }, + "required": ["explanation", "output"], + "additionalProperties": false + } + }, + "final_answer": { "type": "string" } + }, + "required": ["steps", "final_answer"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + +#### Example response + +``` +{ + "steps": [ + { + "explanation": "Start with the equation 8x + 7 = -23.", + "output": "8x + 7 = -23" + }, + { + "explanation": "Subtract 7 from both sides to isolate the term with the variable.", + "output": "8x = -23 - 7" + }, + { + "explanation": "Simplify the right side of the equation.", + "output": "8x = -30" + }, + { + "explanation": "Divide both sides by 8 to solve for x.", + "output": "x = -30 / 8" + }, + { + "explanation": "Simplify the fraction.", + "output": "x = -15 / 4" + } + ], + "final_answer": "x = -15 / 4" +} +``` + +Structured data extraction + +### Structured data extraction + +You can define structured fields to extract from unstructured input data, such as research papers. + +Extracting data from research papers using Structured Outputs + +``` +import OpenAI from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const openai = new OpenAI(); + +const ResearchPaperExtraction = z.object({ + title: z.string(), + authors: z.array(z.string()), + abstract: z.string(), + keywords: z.array(z.string()), +}); + +const response = await openai.responses.parse({ + model: "gpt-4o-2024-08-06", + input: [ + { + role: "system", + content: + "You are an expert at structured data extraction. You will be given unstructured text from a research paper and should convert it into the given structure.", + }, + { role: "user", content: "..." }, + ], + text: { + format: zodTextFormat(ResearchPaperExtraction, "research_paper_extraction"), + }, +}); + +const research_paper = response.output_parsed; +``` + +``` +from openai import OpenAI +from pydantic import BaseModel + +client = OpenAI() + +class ResearchPaperExtraction(BaseModel): + title: str + authors: list[str] + abstract: str + keywords: list[str] + +response = client.responses.parse( + model="gpt-4o-2024-08-06", + input=[ + { + "role": "system", + "content": "You are an expert at structured data extraction. You will be given unstructured text from a research paper and should convert it into the given structure.", + }, + {"role": "user", "content": "..."}, + ], + text_format=ResearchPaperExtraction, +) + +research_paper = response.output_parsed +``` + +``` +curl https://api.openai.com/v1/responses \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-2024-08-06", + "input": [ + { + "role": "system", + "content": "You are an expert at structured data extraction. You will be given unstructured text from a research paper and should convert it into the given structure." + }, + { + "role": "user", + "content": "..." + } + ], + "text": { + "format": { + "type": "json_schema", + "name": "research_paper_extraction", + "schema": { + "type": "object", + "properties": { + "title": { "type": "string" }, + "authors": { + "type": "array", + "items": { "type": "string" } + }, + "abstract": { "type": "string" }, + "keywords": { + "type": "array", + "items": { "type": "string" } + } + }, + "required": ["title", "authors", "abstract", "keywords"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + +#### Example response + +``` +{ + "title": "Application of Quantum Algorithms in Interstellar Navigation: A New Frontier", + "authors": [ + "Dr. Stella Voyager", + "Dr. Nova Star", + "Dr. Lyra Hunter" + ], + "abstract": "This paper investigates the utilization of quantum algorithms to improve interstellar navigation systems. By leveraging quantum superposition and entanglement, our proposed navigation system can calculate optimal travel paths through space-time anomalies more efficiently than classical methods. Experimental simulations suggest a significant reduction in travel time and fuel consumption for interstellar missions.", + "keywords": [ + "Quantum algorithms", + "interstellar navigation", + "space-time anomalies", + "quantum superposition", + "quantum entanglement", + "space travel" + ] +} +``` + +UI generation + +### UI Generation + +You can generate valid HTML by representing it as recursive data structures with constraints, like enums. + +Generating HTML using Structured Outputs + +``` +import OpenAI from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const openai = new OpenAI(); + +const UI = z.lazy(() => + z.object({ + type: z.enum(["div", "button", "header", "section", "field", "form"]), + label: z.string(), + children: z.array(UI), + attributes: z.array( + z.object({ + name: z.string(), + value: z.string(), + }) + ), + }) +); + +const response = await openai.responses.parse({ + model: "gpt-4o-2024-08-06", + input: [ + { + role: "system", + content: "You are a UI generator AI. Convert the user input into a UI.", + }, + { + role: "user", + content: "Make a User Profile Form", + }, + ], + text: { + format: zodTextFormat(UI, "ui"), + }, +}); + +const ui = response.output_parsed; +``` + +``` +from enum import Enum +from typing import List + +from openai import OpenAI +from pydantic import BaseModel + +client = OpenAI() + +class UIType(str, Enum): + div = "div" + button = "button" + header = "header" + section = "section" + field = "field" + form = "form" + +class Attribute(BaseModel): + name: str + value: str + +class UI(BaseModel): + type: UIType + label: str + children: List["UI"] + attributes: List[Attribute] + +UI.model_rebuild() # This is required to enable recursive types + +class Response(BaseModel): + ui: UI + +response = client.responses.parse( + model="gpt-4o-2024-08-06", + input=[ + { + "role": "system", + "content": "You are a UI generator AI. Convert the user input into a UI.", + }, + {"role": "user", "content": "Make a User Profile Form"}, + ], + text_format=Response, +) + +ui = response.output_parsed +``` + +``` +curl https://api.openai.com/v1/responses \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-2024-08-06", + "input": [ + { + "role": "system", + "content": "You are a UI generator AI. Convert the user input into a UI." + }, + { + "role": "user", + "content": "Make a User Profile Form" + } + ], + "text": { + "format": { + "type": "json_schema", + "name": "ui", + "description": "Dynamically generated UI", + "schema": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The type of the UI component", + "enum": ["div", "button", "header", "section", "field", "form"] + }, + "label": { + "type": "string", + "description": "The label of the UI component, used for buttons or form fields" + }, + "children": { + "type": "array", + "description": "Nested UI components", + "items": {"$ref": "#"} + }, + "attributes": { + "type": "array", + "description": "Arbitrary attributes for the UI component, suitable for any element", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the attribute, for example onClick or className" + }, + "value": { + "type": "string", + "description": "The value of the attribute" + } + }, + "required": ["name", "value"], + "additionalProperties": false + } + } + }, + "required": ["type", "label", "children", "attributes"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + +#### Example response + +``` +{ + "type": "form", + "label": "User Profile Form", + "children": [ + { + "type": "div", + "label": "", + "children": [ + { + "type": "field", + "label": "First Name", + "children": [], + "attributes": [ + { + "name": "type", + "value": "text" + }, + { + "name": "name", + "value": "firstName" + }, + { + "name": "placeholder", + "value": "Enter your first name" + } + ] + }, + { + "type": "field", + "label": "Last Name", + "children": [], + "attributes": [ + { + "name": "type", + "value": "text" + }, + { + "name": "name", + "value": "lastName" + }, + { + "name": "placeholder", + "value": "Enter your last name" + } + ] + } + ], + "attributes": [] + }, + { + "type": "button", + "label": "Submit", + "children": [], + "attributes": [ + { + "name": "type", + "value": "submit" + } + ] + } + ], + "attributes": [ + { + "name": "method", + "value": "post" + }, + { + "name": "action", + "value": "/submit-profile" + } + ] +} +``` + +Moderation + +### Moderation + +You can classify inputs on multiple categories, which is a common way of doing moderation. + +Moderation using Structured Outputs + +``` +import OpenAI from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const openai = new OpenAI(); + +const ContentCompliance = z.object({ + is_violating: z.boolean(), + category: z.enum(["violence", "sexual", "self_harm"]).nullable(), + explanation_if_violating: z.string().nullable(), +}); + +const response = await openai.responses.parse({ + model: "gpt-4o-2024-08-06", + input: [ + { + "role": "system", + "content": "Determine if the user input violates specific guidelines and explain if they do." + }, + { + "role": "user", + "content": "How do I prepare for a job interview?" + } + ], + text: { + format: zodTextFormat(ContentCompliance, "content_compliance"), + }, +}); + +const compliance = response.output_parsed; +``` + +``` +from enum import Enum +from typing import Optional + +from openai import OpenAI +from pydantic import BaseModel + +client = OpenAI() + +class Category(str, Enum): + violence = "violence" + sexual = "sexual" + self_harm = "self_harm" + +class ContentCompliance(BaseModel): + is_violating: bool + category: Optional[Category] + explanation_if_violating: Optional[str] + +response = client.responses.parse( + model="gpt-4o-2024-08-06", + input=[ + { + "role": "system", + "content": "Determine if the user input violates specific guidelines and explain if they do.", + }, + {"role": "user", "content": "How do I prepare for a job interview?"}, + ], + text_format=ContentCompliance, +) + +compliance = response.output_parsed +``` + +``` +curl https://api.openai.com/v1/responses \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-2024-08-06", + "input": [ + { + "role": "system", + "content": "Determine if the user input violates specific guidelines and explain if they do." + }, + { + "role": "user", + "content": "How do I prepare for a job interview?" + } + ], + "text": { + "format": { + "type": "json_schema", + "name": "content_compliance", + "description": "Determines if content is violating specific moderation rules", + "schema": { + "type": "object", + "properties": { + "is_violating": { + "type": "boolean", + "description": "Indicates if the content is violating guidelines" + }, + "category": { + "type": ["string", "null"], + "description": "Type of violation, if the content is violating guidelines. Null otherwise.", + "enum": ["violence", "sexual", "self_harm"] + }, + "explanation_if_violating": { + "type": ["string", "null"], + "description": "Explanation of why the content is violating" + } + }, + "required": ["is_violating", "category", "explanation_if_violating"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + +#### Example response + +``` +{ + "is_violating": false, + "category": null, + "explanation_if_violating": null +} +``` + +How to use Structured Outputs with text.format +---------------------------------------------- + +Step 1: Define your schema + +First you must design the JSON Schema that the model should be constrained to follow. See the [examples](/docs/guides/structured-outputs#examples) at the top of this guide for reference. + +While Structured Outputs supports much of JSON Schema, some features are unavailable either for performance or technical reasons. See [here](/docs/guides/structured-outputs#supported-schemas) for more details. + +#### Tips for your JSON Schema + +To maximize the quality of model generations, we recommend the following: + +* Name keys clearly and intuitively +* Create clear titles and descriptions for important keys in your structure +* Create and use evals to determine the structure that works best for your use case + +Step 2: Supply your schema in the API call + +To use Structured Outputs, simply specify + +``` +text: { format: { type: "json_schema", "strict": true, "schema": … } } +``` + +For example: + +``` +response = client.responses.create( + model="gpt-4o-2024-08-06", + input=[ + {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."}, + {"role": "user", "content": "how can I solve 8x + 7 = -23"} + ], + text={ + "format": { + "type": "json_schema", + "name": "math_response", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": {"type": "string"}, + "output": {"type": "string"} + }, + "required": ["explanation", "output"], + "additionalProperties": False + } + }, + "final_answer": {"type": "string"} + }, + "required": ["steps", "final_answer"], + "additionalProperties": False + }, + "strict": True + } + } +) + +print(response.output_text) +``` + +``` +const response = await openai.responses.create({ + model: "gpt-4o-2024-08-06", + input: [ + { role: "system", content: "You are a helpful math tutor. Guide the user through the solution step by step." }, + { role: "user", content: "how can I solve 8x + 7 = -23" } + ], + text: { + format: { + type: "json_schema", + name: "math_response", + schema: { + type: "object", + properties: { + steps: { + type: "array", + items: { + type: "object", + properties: { + explanation: { type: "string" }, + output: { type: "string" } + }, + required: ["explanation", "output"], + additionalProperties: false + } + }, + final_answer: { type: "string" } + }, + required: ["steps", "final_answer"], + additionalProperties: false + }, + strict: true + } + } +}); + +console.log(response.output_text); +``` + +``` +curl https://api.openai.com/v1/responses \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o-2024-08-06", + "input": [ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step." + }, + { + "role": "user", + "content": "how can I solve 8x + 7 = -23" + } + ], + "text": { + "format": { + "type": "json_schema", + "name": "math_response", + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "output": { "type": "string" } + }, + "required": ["explanation", "output"], + "additionalProperties": false + } + }, + "final_answer": { "type": "string" } + }, + "required": ["steps", "final_answer"], + "additionalProperties": false + }, + "strict": true + } + } + }' +``` + +**Note:** the first request you make with any schema will have additional latency as our API processes the schema, but subsequent requests with the same schema will not have additional latency. + +Step 3: Handle edge cases + +In some cases, the model might not generate a valid response that matches the provided JSON schema. + +This can happen in the case of a refusal, if the model refuses to answer for safety reasons, or if for example you reach a max tokens limit and the response is incomplete. + +``` +try { + const response = await openai.responses.create({ + model: "gpt-4o-2024-08-06", + input: [{ + role: "system", + content: "You are a helpful math tutor. Guide the user through the solution step by step.", + }, + { + role: "user", + content: "how can I solve 8x + 7 = -23" + }, + ], + max_output_tokens: 50, + text: { + format: { + type: "json_schema", + name: "math_response", + schema: { + type: "object", + properties: { + steps: { + type: "array", + items: { + type: "object", + properties: { + explanation: { + type: "string" + }, + output: { + type: "string" + }, + }, + required: ["explanation", "output"], + additionalProperties: false, + }, + }, + final_answer: { + type: "string" + }, + }, + required: ["steps", "final_answer"], + additionalProperties: false, + }, + strict: true, + }, + } + }); + + if (response.status === "incomplete" && response.incomplete_details.reason === "max_output_tokens") { + // Handle the case where the model did not return a complete response + throw new Error("Incomplete response"); + } + + const math_response = response.output[0].content[0]; + + if (math_response.type === "refusal") { + // handle refusal + console.log(math_response.refusal); + } else if (math_response.type === "output_text") { + console.log(math_response.text); + } else { + throw new Error("No response content"); + } +} catch (e) { + // Handle edge cases + console.error(e); +} +``` + +``` +try: + response = client.responses.create( + model="gpt-4o-2024-08-06", + input=[ + { + "role": "system", + "content": "You are a helpful math tutor. Guide the user through the solution step by step.", + }, + {"role": "user", "content": "how can I solve 8x + 7 = -23"}, + ], + text={ + "format": { + "type": "json_schema", + "name": "math_response", + "strict": True, + "schema": { + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": {"type": "string"}, + "output": {"type": "string"}, + }, + "required": ["explanation", "output"], + "additionalProperties": False, + }, + }, + "final_answer": {"type": "string"}, + }, + "required": ["steps", "final_answer"], + "additionalProperties": False, + }, + "strict": True, + }, + }, + ) +except Exception as e: + # handle errors like finish_reason, refusal, content_filter, etc. + pass +``` + +### + +Refusals with Structured Outputs + +When using Structured Outputs with user-generated input, OpenAI models may occasionally refuse to fulfill the request for safety reasons. Since a refusal does not necessarily follow the schema you have supplied in `response_format`, the API response will include a new field called `refusal` to indicate that the model refused to fulfill the request. + +When the `refusal` property appears in your output object, you might present the refusal in your UI, or include conditional logic in code that consumes the response to handle the case of a refused request. + +``` +class Step(BaseModel): + explanation: str + output: str + +class MathReasoning(BaseModel): + steps: list[Step] + final_answer: str + +completion = client.chat.completions.parse( + model="gpt-4o-2024-08-06", + messages=[ + {"role": "system", "content": "You are a helpful math tutor. Guide the user through the solution step by step."}, + {"role": "user", "content": "how can I solve 8x + 7 = -23"}, + ], + response_format=MathReasoning, +) + +math_reasoning = completion.choices[0].message + +# If the model refuses to respond, you will get a refusal message + +if math_reasoning.refusal: + print(math_reasoning.refusal) +else: + print(math_reasoning.parsed) +``` + +``` +const Step = z.object({ + explanation: z.string(), + output: z.string(), +}); + +const MathReasoning = z.object({ + steps: z.array(Step), + final_answer: z.string(), +}); + +const completion = await openai.chat.completions.parse({ + model: "gpt-4o-2024-08-06", + messages: [ + { role: "system", content: "You are a helpful math tutor. Guide the user through the solution step by step." }, + { role: "user", content: "how can I solve 8x + 7 = -23" }, + ], + response_format: zodResponseFormat(MathReasoning, "math_reasoning"), +}); + +const math_reasoning = completion.choices[0].message + +// If the model refuses to respond, you will get a refusal message +if (math_reasoning.refusal) { + console.log(math_reasoning.refusal); +} else { + console.log(math_reasoning.parsed); +} +``` + +The API response from a refusal will look something like this: + +``` +{ + "id": "resp_1234567890", + "object": "response", + "created_at": 1721596428, + "status": "completed", + "error": null, + "incomplete_details": null, + "input": [], + "instructions": null, + "max_output_tokens": null, + "model": "gpt-4o-2024-08-06", + "output": [{ + "id": "msg_1234567890", + "type": "message", + "role": "assistant", + "content": [ + { + "type": "refusal", + "refusal": "I'm sorry, I cannot assist with that request." + } + ] + }], + "usage": { + "input_tokens": 81, + "output_tokens": 11, + "total_tokens": 92, + "output_tokens_details": { + "reasoning_tokens": 0, + } + }, +} +``` + +### + +Tips and best practices + +#### Handling user-generated input + +If your application is using user-generated input, make sure your prompt includes instructions on how to handle situations where the input cannot result in a valid response. + +The model will always try to adhere to the provided schema, which can result in hallucinations if the input is completely unrelated to the schema. + +You could include language in your prompt to specify that you want to return empty parameters, or a specific sentence, if the model detects that the input is incompatible with the task. + +#### Handling mistakes + +Structured Outputs can still contain mistakes. If you see mistakes, try adjusting your instructions, providing examples in the system instructions, or splitting tasks into simpler subtasks. Refer to the [prompt engineering guide](/docs/guides/prompt-engineering) for more guidance on how to tweak your inputs. + +#### Avoid JSON schema divergence + +To prevent your JSON Schema and corresponding types in your programming language from diverging, we strongly recommend using the native Pydantic/zod sdk support. + +If you prefer to specify the JSON schema directly, you could add CI rules that flag when either the JSON schema or underlying data objects are edited, or add a CI step that auto-generates the JSON Schema from type definitions (or vice-versa). + +Streaming +--------- + +You can use streaming to process model responses or function call arguments as they are being generated, and parse them as structured data. + +That way, you don't have to wait for the entire response to complete before handling it. This is particularly useful if you would like to display JSON fields one by one, or handle function call arguments as soon as they are available. + +We recommend relying on the SDKs to handle streaming with Structured Outputs. + +``` +from typing import List + +from openai import OpenAI +from pydantic import BaseModel + +class EntitiesModel(BaseModel): + attributes: List[str] + colors: List[str] + animals: List[str] + +client = OpenAI() + +with client.responses.stream( + model="gpt-4.1", + input=[ + {"role": "system", "content": "Extract entities from the input text"}, + { + "role": "user", + "content": "The quick brown fox jumps over the lazy dog with piercing blue eyes", + }, + ], + text_format=EntitiesModel, +) as stream: + for event in stream: + if event.type == "response.refusal.delta": + print(event.delta, end="") + elif event.type == "response.output_text.delta": + print(event.delta, end="") + elif event.type == "response.error": + print(event.error, end="") + elif event.type == "response.completed": + print("Completed") + # print(event.response.output) + + final_response = stream.get_final_response() + print(final_response) +``` + +``` +import { OpenAI } from "openai"; +import { zodTextFormat } from "openai/helpers/zod"; +import { z } from "zod"; + +const EntitiesSchema = z.object({ + attributes: z.array(z.string()), + colors: z.array(z.string()), + animals: z.array(z.string()), +}); + +const openai = new OpenAI(); +const stream = openai.responses + .stream({ + model: "gpt-4.1", + input: [ + { role: "user", content: "What's the weather like in Paris today?" }, + ], + text: { + format: zodTextFormat(EntitiesSchema, "entities"), + }, + }) + .on("response.refusal.delta", (event) => { + process.stdout.write(event.delta); + }) + .on("response.output_text.delta", (event) => { + process.stdout.write(event.delta); + }) + .on("response.output_text.done", () => { + process.stdout.write("\n"); + }) + .on("response.error", (event) => { + console.error(event.error); + }); + +const result = await stream.finalResponse(); + +console.log(result); +``` + +Supported schemas +----------------- + +Structured Outputs supports a subset of the [JSON Schema](https://json-schema.org/docs) language. + +#### Supported types + +The following types are supported for Structured Outputs: + +* String +* Number +* Boolean +* Integer +* Object +* Array +* Enum +* anyOf + +#### Supported properties + +In addition to specifying the type of a property, you can specify a selection of additional constraints: + +**Supported `string` properties:** + +* `pattern` — A regular expression that the string must match. +* `format` — Predefined formats for strings. Currently supported: + * `date-time` + * `time` + * `date` + * `duration` + * `email` + * `hostname` + * `ipv4` + * `ipv6` + * `uuid` + +**Supported `number` properties:** + +* `multipleOf` — The number must be a multiple of this value. +* `maximum` — The number must be less than or equal to this value. +* `exclusiveMaximum` — The number must be less than this value. +* `minimum` — The number must be greater than or equal to this value. +* `exclusiveMinimum` — The number must be greater than this value. + +**Supported `array` properties:** + +* `minItems` — The array must have at least this many items. +* `maxItems` — The array must have at most this many items. + +Here are some examples on how you can use these type restrictions: + +String Restrictions + +``` +{ + "name": "user_data", + "strict": true, + "schema": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the user" + }, + "username": { + "type": "string", + "description": "The username of the user. Must start with @", + "pattern": "^@[a-zA-Z0-9_]+$" + }, + "email": { + "type": "string", + "description": "The email of the user", + "format": "email" + } + }, + "additionalProperties": false, + "required": [ + "name", "username", "email" + ] + } +} +``` + +Number Restrictions + +``` +{ + "name": "weather_data", + "strict": true, + "schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get the weather for" + }, + "unit": { + "type": ["string", "null"], + "description": "The unit to return the temperature in", + "enum": ["F", "C"] + }, + "value": { + "type": "number", + "description": "The actual temperature value in the location", + "minimum": -130, + "maximum": 130 + } + }, + "additionalProperties": false, + "required": [ + "location", "unit", "value" + ] + } +} +``` + +Note these constraints are [not yet supported for fine-tuned models](/docs/guides/structured-outputs#some-type-specific-keywords-are-not-yet-supported). + +#### Root objects must not be `anyOf` and must be an object + +Note that the root level object of a schema must be an object, and not use `anyOf`. A pattern that appears in Zod (as one example) is using a discriminated union, which produces an `anyOf` at the top level. So code such as the following won't work: + +``` +import { z } from 'zod'; +import { zodResponseFormat } from 'openai/helpers/zod'; + +const BaseResponseSchema = z.object({/* ... */}); +const UnsuccessfulResponseSchema = z.object({/* ... */}); + +const finalSchema = z.discriminatedUnion('status', [ +BaseResponseSchema, +UnsuccessfulResponseSchema, +]); + +// Invalid JSON Schema for Structured Outputs +const json = zodResponseFormat(finalSchema, 'final_schema'); +``` + +#### All fields must be `required` + +To use Structured Outputs, all fields or function parameters must be specified as `required`. + +``` +{ + "name": "get_weather", + "description": "Fetches the weather in the given location", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get the weather for" + }, + "unit": { + "type": "string", + "description": "The unit to return the temperature in", + "enum": ["F", "C"] + } + }, + "additionalProperties": false, + "required": ["location", "unit"] + } +} +``` + +Although all fields must be required (and the model will return a value for each parameter), it is possible to emulate an optional parameter by using a union type with `null`. + +``` +{ + "name": "get_weather", + "description": "Fetches the weather in the given location", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get the weather for" + }, + "unit": { + "type": ["string", "null"], + "description": "The unit to return the temperature in", + "enum": ["F", "C"] + } + }, + "additionalProperties": false, + "required": [ + "location", "unit" + ] + } +} +``` + +#### Objects have limitations on nesting depth and size + +A schema may have up to 5000 object properties total, with up to 10 levels of nesting. + +#### Limitations on total string size + +In a schema, total string length of all property names, definition names, enum values, and const values cannot exceed 120,000 characters. + +#### Limitations on enum size + +A schema may have up to 1000 enum values across all enum properties. + +For a single enum property with string values, the total string length of all enum values cannot exceed 15,000 characters when there are more than 250 enum values. + +#### `additionalProperties: false` must always be set in objects + +`additionalProperties` controls whether it is allowable for an object to contain additional keys / values that were not defined in the JSON Schema. + +Structured Outputs only supports generating specified keys / values, so we require developers to set `additionalProperties: false` to opt into Structured Outputs. + +``` +{ + "name": "get_weather", + "description": "Fetches the weather in the given location", + "strict": true, + "schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get the weather for" + }, + "unit": { + "type": "string", + "description": "The unit to return the temperature in", + "enum": ["F", "C"] + } + }, + "additionalProperties": false, + "required": [ + "location", "unit" + ] + } +} +``` + +#### Key ordering + +When using Structured Outputs, outputs will be produced in the same order as the ordering of keys in the schema. + +#### Some type-specific keywords are not yet supported + +* **Composition:** `allOf`, `not`, `dependentRequired`, `dependentSchemas`, `if`, `then`, `else` + +For fine-tuned models, we additionally do not support the following: + +* **For strings:** `minLength`, `maxLength`, `pattern`, `format` +* **For numbers:** `minimum`, `maximum`, `multipleOf` +* **For objects:** `patternProperties` +* **For arrays:** `minItems`, `maxItems` + +If you turn on Structured Outputs by supplying `strict: true` and call the API with an unsupported JSON Schema, you will receive an error. + +#### For `anyOf`, the nested schemas must each be a valid JSON Schema per this subset + +Here's an example supported anyOf schema: + +``` +{ + "type": "object", + "properties": { + "item": { + "anyOf": [ + { + "type": "object", + "description": "The user object to insert into the database", + "properties": { + "name": { + "type": "string", + "description": "The name of the user" + }, + "age": { + "type": "number", + "description": "The age of the user" + } + }, + "additionalProperties": false, + "required": [ + "name", + "age" + ] + }, + { + "type": "object", + "description": "The address object to insert into the database", + "properties": { + "number": { + "type": "string", + "description": "The number of the address. Eg. for 123 main st, this would be 123" + }, + "street": { + "type": "string", + "description": "The street name. Eg. for 123 main st, this would be main st" + }, + "city": { + "type": "string", + "description": "The city of the address" + } + }, + "additionalProperties": false, + "required": [ + "number", + "street", + "city" + ] + } + ] + } + }, + "additionalProperties": false, + "required": [ + "item" + ] +} +``` + +#### Definitions are supported + +You can use definitions to define subschemas which are referenced throughout your schema. The following is a simple example. + +``` +{ + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "$ref": "#/$defs/step" + } + }, + "final_answer": { + "type": "string" + } + }, + "$defs": { + "step": { + "type": "object", + "properties": { + "explanation": { + "type": "string" + }, + "output": { + "type": "string" + } + }, + "required": [ + "explanation", + "output" + ], + "additionalProperties": false + } + }, + "required": [ + "steps", + "final_answer" + ], + "additionalProperties": false +} +``` + +#### Recursive schemas are supported + +Sample recursive schema using `#` to indicate root recursion. + +``` +{ + "name": "ui", + "description": "Dynamically generated UI", + "strict": true, + "schema": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "The type of the UI component", + "enum": ["div", "button", "header", "section", "field", "form"] + }, + "label": { + "type": "string", + "description": "The label of the UI component, used for buttons or form fields" + }, + "children": { + "type": "array", + "description": "Nested UI components", + "items": { + "$ref": "#" + } + }, + "attributes": { + "type": "array", + "description": "Arbitrary attributes for the UI component, suitable for any element", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "The name of the attribute, for example onClick or className" + }, + "value": { + "type": "string", + "description": "The value of the attribute" + } + }, + "additionalProperties": false, + "required": ["name", "value"] + } + } + }, + "required": ["type", "label", "children", "attributes"], + "additionalProperties": false + } +} +``` + +Sample recursive schema using explicit recursion: + +``` +{ + "type": "object", + "properties": { + "linked_list": { + "$ref": "#/$defs/linked_list_node" + } + }, + "$defs": { + "linked_list_node": { + "type": "object", + "properties": { + "value": { + "type": "number" + }, + "next": { + "anyOf": [ + { + "$ref": "#/$defs/linked_list_node" + }, + { + "type": "null" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "next", + "value" + ] + } + }, + "additionalProperties": false, + "required": [ + "linked_list" + ] +} +``` + +JSON mode +--------- + +JSON mode is a more basic version of the Structured Outputs feature. While JSON mode ensures that model output is valid JSON, Structured Outputs reliably matches the model's output to the schema you specify. We recommend you use Structured Outputs if it is supported for your use case. + +When JSON mode is turned on, the model's output is ensured to be valid JSON, except for in some edge cases that you should detect and handle appropriately. + +To turn on JSON mode with the Responses API you can set the `text.format` to `{ "type": "json_object" }`. If you are using function calling, JSON mode is always turned on. + +Important notes: + +* When using JSON mode, you must always instruct the model to produce JSON via some message in the conversation, for example via your system message. If you don't include an explicit instruction to generate JSON, the model may generate an unending stream of whitespace and the request may run continually until it reaches the token limit. To help ensure you don't forget, the API will throw an error if the string "JSON" does not appear somewhere in the context. +* JSON mode will not guarantee the output matches any specific schema, only that it is valid and parses without errors. You should use Structured Outputs to ensure it matches your schema, or if that is not possible, you should use a validation library and potentially retries to ensure that the output matches your desired schema. +* Your application must detect and handle the edge cases that can result in the model output not being a complete JSON object (see below) + +Handling edge cases + +``` +const we_did_not_specify_stop_tokens = true; + +try { + const response = await openai.responses.create({ + model: "gpt-3.5-turbo-0125", + input: [ + { + role: "system", + content: "You are a helpful assistant designed to output JSON.", + }, + { role: "user", content: "Who won the world series in 2020? Please respond in the format {winner: ...}" }, + ], + text: { format: { type: "json_object" } }, + }); + + // Check if the conversation was too long for the context window, resulting in incomplete JSON + if (response.status === "incomplete" && response.incomplete_details.reason === "max_output_tokens") { + // your code should handle this error case + } + + // Check if the OpenAI safety system refused the request and generated a refusal instead + if (response.output[0].content[0].type === "refusal") { + // your code should handle this error case + // In this case, the .content field will contain the explanation (if any) that the model generated for why it is refusing + console.log(response.output[0].content[0].refusal) + } + + // Check if the model's output included restricted content, so the generation of JSON was halted and may be partial + if (response.status === "incomplete" && response.incomplete_details.reason === "content_filter") { + // your code should handle this error case + } + + if (response.status === "completed") { + // In this case the model has either successfully finished generating the JSON object according to your schema, or the model generated one of the tokens you provided as a "stop token" + + if (we_did_not_specify_stop_tokens) { + // If you didn't specify any stop tokens, then the generation is complete and the content key will contain the serialized JSON object + // This will parse successfully and should now contain {"winner": "Los Angeles Dodgers"} + console.log(JSON.parse(response.output_text)) + } else { + // Check if the response.output_text ends with one of your stop tokens and handle appropriately + } + } +} catch (e) { + // Your code should handle errors here, for example a network error calling the API + console.error(e) +} +``` + +``` +we_did_not_specify_stop_tokens = True + +try: + response = client.responses.create( + model="gpt-3.5-turbo-0125", + input=[ + {"role": "system", "content": "You are a helpful assistant designed to output JSON."}, + {"role": "user", "content": "Who won the world series in 2020? Please respond in the format {winner: ...}"} + ], + text={"format": {"type": "json_object"}} + ) + + # Check if the conversation was too long for the context window, resulting in incomplete JSON + if response.status == "incomplete" and response.incomplete_details.reason == "max_output_tokens": + # your code should handle this error case + pass + + # Check if the OpenAI safety system refused the request and generated a refusal instead + if response.output[0].content[0].type == "refusal": + # your code should handle this error case + # In this case, the .content field will contain the explanation (if any) that the model generated for why it is refusing + print(response.output[0].content[0]["refusal"]) + + # Check if the model's output included restricted content, so the generation of JSON was halted and may be partial + if response.status == "incomplete" and response.incomplete_details.reason == "content_filter": + # your code should handle this error case + pass + + if response.status == "completed": + # In this case the model has either successfully finished generating the JSON object according to your schema, or the model generated one of the tokens you provided as a "stop token" + + if we_did_not_specify_stop_tokens: + # If you didn't specify any stop tokens, then the generation is complete and the content key will contain the serialized JSON object + # This will parse successfully and should now contain "{"winner": "Los Angeles Dodgers"}" + print(response.output_text) + else: + # Check if the response.output_text ends with one of your stop tokens and handle appropriately + pass +except Exception as e: + # Your code should handle errors here, for example a network error calling the API + print(e) +``` + +Resources +--------- + +To learn more about Structured Outputs, we recommend browsing the following resources: + +* Check out our [introductory cookbook](https://cookbook.openai.com/examples/structured_outputs_intro) on Structured Outputs +* Learn [how to build multi-agent systems](https://cookbook.openai.com/examples/structured_outputs_multi_agent) with Structured Outputs \ No newline at end of file diff --git a/Source/WulaFallenEmpire/WulaAI_DevDocs/openai/function-calling.md b/Source/WulaFallenEmpire/WulaAI_DevDocs/openai/function-calling.md new file mode 100644 index 00000000..da2c86af --- /dev/null +++ b/Source/WulaFallenEmpire/WulaAI_DevDocs/openai/function-calling.md @@ -0,0 +1,1052 @@ +Function calling +================ + +Give models access to new functionality and data they can use to follow instructions and respond to prompts. + +**Function calling** (also known as **tool calling**) provides a powerful and flexible way for OpenAI models to interface with external systems and access data outside their training data. This guide shows how you can connect a model to data and actions provided by your application. We'll show how to use function tools (defined by a JSON schema) and custom tools which work with free form text inputs and outputs. + +How it works +------------ + +Let's begin by understanding a few key terms about tool calling. After we have a shared vocabulary for tool calling, we'll show you how it's done with some practical examples. + +Tools - functionality we give the model + +A **function** or **tool** refers in the abstract to a piece of functionality that we tell the model it has access to. As a model generates a response to a prompt, it may decide that it needs data or functionality provided by a tool to follow the prompt's instructions. + +You could give the model access to tools that: + +* Get today's weather for a location +* Access account details for a given user ID +* Issue refunds for a lost order + +Or anything else you'd like the model to be able to know or do as it responds to a prompt. + +When we make an API request to the model with a prompt, we can include a list of tools the model could consider using. For example, if we wanted the model to be able to answer questions about the current weather somewhere in the world, we might give it access to a `get_weather` tool that takes `location` as an argument. + +Tool calls - requests from the model to use tools + +A **function call** or **tool call** refers to a special kind of response we can get from the model if it examines a prompt, and then determines that in order to follow the instructions in the prompt, it needs to call one of the tools we made available to it. + +If the model receives a prompt like "what is the weather in Paris?" in an API request, it could respond to that prompt with a tool call for the `get_weather` tool, with `Paris` as the `location` argument. + +Tool call outputs - output we generate for the model + +A **function call output** or **tool call output** refers to the response a tool generates using the input from a model's tool call. The tool call output can either be structured JSON or plain text, and it should contain a reference to a specific model tool call (referenced by `call_id` in the examples to come). To complete our weather example: + +* The model has access to a `get_weather` **tool** that takes `location` as an argument. +* In response to a prompt like "what's the weather in Paris?" the model returns a **tool call** that contains a `location` argument with a value of `Paris` +* The **tool call output** might return a JSON object (e.g., `{"temperature": "25", "unit": "C"}`, indicating a current temperature of 25 degrees), [Image contents](/docs/guides/images), or [File contents](/docs/guides/pdf-files). + +We then send all of the tool definition, the original prompt, the model's tool call, and the tool call output back to the model to finally receive a text response like: + +``` +The weather in Paris today is 25C. +``` + +Functions versus tools + +* A function is a specific kind of tool, defined by a JSON schema. A function definition allows the model to pass data to your application, where your code can access data or take actions suggested by the model. +* In addition to function tools, there are custom tools (described in this guide) that work with free text inputs and outputs. +* There are also [built-in tools](/docs/guides/tools) that are part of the OpenAI platform. These tools enable the model to [search the web](/docs/guides/tools-web-search), [execute code](/docs/guides/tools-code-interpreter), access the functionality of an [MCP server](/docs/guides/tools-remote-mcp), and more. + +### The tool calling flow + +Tool calling is a multi-step conversation between your application and a model via the OpenAI API. The tool calling flow has five high level steps: + +1. Make a request to the model with tools it could call +2. Receive a tool call from the model +3. Execute code on the application side with input from the tool call +4. Make a second request to the model with the tool output +5. Receive a final response from the model (or more tool calls) + +![Function Calling Diagram Steps](https://cdn.openai.com/API/docs/images/function-calling-diagram-steps.png) + +Function tool example +--------------------- + +Let's look at an end-to-end tool calling flow for a `get_horoscope` function that gets a daily horoscope for an astrological sign. + +Complete tool calling example + +``` +from openai import OpenAI +import json + +client = OpenAI() + +# 1. Define a list of callable tools for the model +tools = [ + { + "type": "function", + "name": "get_horoscope", + "description": "Get today's horoscope for an astrological sign.", + "parameters": { + "type": "object", + "properties": { + "sign": { + "type": "string", + "description": "An astrological sign like Taurus or Aquarius", + }, + }, + "required": ["sign"], + }, + }, +] + +def get_horoscope(sign): + return f"{sign}: Next Tuesday you will befriend a baby otter." + +# Create a running input list we will add to over time +input_list = [ + {"role": "user", "content": "What is my horoscope? I am an Aquarius."} +] + +# 2. Prompt the model with tools defined +response = client.responses.create( + model="gpt-5", + tools=tools, + input=input_list, +) + +# Save function call outputs for subsequent requests +input_list += response.output + +for item in response.output: + if item.type == "function_call": + if item.name == "get_horoscope": + # 3. Execute the function logic for get_horoscope + horoscope = get_horoscope(json.loads(item.arguments)) + + # 4. Provide function call results to the model + input_list.append({ + "type": "function_call_output", + "call_id": item.call_id, + "output": json.dumps({ + "horoscope": horoscope + }) + }) + +print("Final input:") +print(input_list) + +response = client.responses.create( + model="gpt-5", + instructions="Respond only with a horoscope generated by a tool.", + tools=tools, + input=input_list, +) + +# 5. The model should be able to give a response! +print("Final output:") +print(response.model_dump_json(indent=2)) +print("\n" + response.output_text) +``` + +``` +import OpenAI from "openai"; +const openai = new OpenAI(); + +// 1. Define a list of callable tools for the model +const tools = [ + { + type: "function", + name: "get_horoscope", + description: "Get today's horoscope for an astrological sign.", + parameters: { + type: "object", + properties: { + sign: { + type: "string", + description: "An astrological sign like Taurus or Aquarius", + }, + }, + required: ["sign"], + }, + }, +]; + +function getHoroscope(sign) { + return sign + " Next Tuesday you will befriend a baby otter."; +} + +// Create a running input list we will add to over time +let input = [ + { role: "user", content: "What is my horoscope? I am an Aquarius." }, +]; + +// 2. Prompt the model with tools defined +let response = await openai.responses.create({ + model: "gpt-5", + tools, + input, +}); + +response.output.forEach((item) => { + if (item.type == "function_call") { + if (item.name == "get_horoscope"): + // 3. Execute the function logic for get_horoscope + const horoscope = get_horoscope(JSON.parse(item.arguments)) + + // 4. Provide function call results to the model + input_list.push({ + type: "function_call_output", + call_id: item.call_id, + output: json.dumps({ + horoscope + }) + }) + } +}); + +console.log("Final input:"); +console.log(JSON.stringify(input, null, 2)); + +response = await openai.responses.create({ + model: "gpt-5", + instructions: "Respond only with a horoscope generated by a tool.", + tools, + input, +}); + +// 5. The model should be able to give a response! +console.log("Final output:"); +console.log(JSON.stringify(response.output, null, 2)); +``` + +Note that for reasoning models like GPT-5 or o4-mini, any reasoning items returned in model responses with tool calls must also be passed back with tool call outputs. + +Defining functions +------------------ + +Functions can be set in the `tools` parameter of each API request. A function is defined by its schema, which informs the model what it does and what input arguments it expects. A function definition has the following properties: + +|Field|Description| +|---|---| +|type|This should always be function| +|name|The function's name (e.g. get_weather)| +|description|Details on when and how to use the function| +|parameters|JSON schema defining the function's input arguments| +|strict|Whether to enforce strict mode for the function call| + +Here is an example function definition for a `get_weather` function + +``` +{ + "type": "function", + "name": "get_weather", + "description": "Retrieves current weather for the given location.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Bogotá, Colombia" + }, + "units": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Units the temperature will be returned in." + } + }, + "required": ["location", "units"], + "additionalProperties": false + }, + "strict": true +} +``` + +Because the `parameters` are defined by a [JSON schema](https://json-schema.org/), you can leverage many of its rich features like property types, enums, descriptions, nested objects, and, recursive objects. + +### Best practices for defining functions + +1. **Write clear and detailed function names, parameter descriptions, and instructions.** + + * **Explicitly describe the purpose of the function and each parameter** (and its format), and what the output represents. + * **Use the system prompt to describe when (and when not) to use each function.** Generally, tell the model _exactly_ what to do. + * **Include examples and edge cases**, especially to rectify any recurring failures. (**Note:** Adding examples may hurt performance for [reasoning models](/docs/guides/reasoning).) +2. **Apply software engineering best practices.** + + * **Make the functions obvious and intuitive**. ([principle of least surprise](https://en.wikipedia.org/wiki/Principle_of_least_astonishment)) + * **Use enums** and object structure to make invalid states unrepresentable. (e.g. `toggle_light(on: bool, off: bool)` allows for invalid calls) + * **Pass the intern test.** Can an intern/human correctly use the function given nothing but what you gave the model? (If not, what questions do they ask you? Add the answers to the prompt.) +3. **Offload the burden from the model and use code where possible.** + + * **Don't make the model fill arguments you already know.** For example, if you already have an `order_id` based on a previous menu, don't have an `order_id` param – instead, have no params `submit_refund()` and pass the `order_id` with code. + * **Combine functions that are always called in sequence.** For example, if you always call `mark_location()` after `query_location()`, just move the marking logic into the query function call. +4. **Keep the number of functions small for higher accuracy.** + + * **Evaluate your performance** with different numbers of functions. + * **Aim for fewer than 20 functions** at any one time, though this is just a soft suggestion. +5. **Leverage OpenAI resources.** + + * **Generate and iterate on function schemas** in the [Playground](/playground). + * **Consider [fine-tuning](https://platform.openai.com/docs/guides/fine-tuning) to increase function calling accuracy** for large numbers of functions or difficult tasks. ([cookbook](https://cookbook.openai.com/examples/fine_tuning_for_function_calling)) + +### Token Usage + +Under the hood, functions are injected into the system message in a syntax the model has been trained on. This means functions count against the model's context limit and are billed as input tokens. If you run into token limits, we suggest limiting the number of functions or the length of the descriptions you provide for function parameters. + +It is also possible to use [fine-tuning](/docs/guides/fine-tuning#fine-tuning-examples) to reduce the number of tokens used if you have many functions defined in your tools specification. + +Handling function calls +----------------------- + +When the model calls a function, you must execute it and return the result. Since model responses can include zero, one, or multiple calls, it is best practice to assume there are several. + +The response `output` array contains an entry with the `type` having a value of `function_call`. Each entry with a `call_id` (used later to submit the function result), `name`, and JSON-encoded `arguments`. + +Sample response with multiple function calls + +``` +[ + { + "id": "fc_12345xyz", + "call_id": "call_12345xyz", + "type": "function_call", + "name": "get_weather", + "arguments": "{\"location\":\"Paris, France\"}" + }, + { + "id": "fc_67890abc", + "call_id": "call_67890abc", + "type": "function_call", + "name": "get_weather", + "arguments": "{\"location\":\"Bogotá, Colombia\"}" + }, + { + "id": "fc_99999def", + "call_id": "call_99999def", + "type": "function_call", + "name": "send_email", + "arguments": "{\"to\":\"bob@email.com\",\"body\":\"Hi bob\"}" + } +] +``` + +Execute function calls and append results + +``` +for tool_call in response.output: + if tool_call.type != "function_call": + continue + + name = tool_call.name + args = json.loads(tool_call.arguments) + + result = call_function(name, args) + input_messages.append({ + "type": "function_call_output", + "call_id": tool_call.call_id, + "output": str(result) + }) +``` + +``` +for (const toolCall of response.output) { + if (toolCall.type !== "function_call") { + continue; + } + + const name = toolCall.name; + const args = JSON.parse(toolCall.arguments); + + const result = callFunction(name, args); + input.push({ + type: "function_call_output", + call_id: toolCall.call_id, + output: result.toString() + }); +} +``` + +In the example above, we have a hypothetical `call_function` to route each call. Here’s a possible implementation: + +Execute function calls and append results + +``` +def call_function(name, args): + if name == "get_weather": + return get_weather(**args) + if name == "send_email": + return send_email(**args) +``` + +``` +const callFunction = async (name, args) => { + if (name === "get_weather") { + return getWeather(args.latitude, args.longitude); + } + if (name === "send_email") { + return sendEmail(args.to, args.body); + } +}; +``` + +### Formatting results + +A result must be a string, but the format is up to you (JSON, error codes, plain text, etc.). The model will interpret that string as needed. + +If your function has no return value (e.g. `send_email`), simply return a string to indicate success or failure. (e.g. `"success"`) + +### Incorporating results into response + +After appending the results to your `input`, you can send them back to the model to get a final response. + +Send results back to model + +``` +response = client.responses.create( + model="gpt-4.1", + input=input_messages, + tools=tools, +) +``` + +``` +const response = await openai.responses.create({ + model: "gpt-4.1", + input, + tools, +}); +``` + +Final response + +``` +"It's about 15°C in Paris, 18°C in Bogotá, and I've sent that email to Bob." +``` + +Additional configurations +------------------------- + +### Tool choice + +By default the model will determine when and how many tools to use. You can force specific behavior with the `tool_choice` parameter. + +1. **Auto:** (_Default_) Call zero, one, or multiple functions. `tool_choice: "auto"` +2. **Required:** Call one or more functions. `tool_choice: "required"` +3. **Forced Function:** Call exactly one specific function. `tool_choice: {"type": "function", "name": "get_weather"}` +4. **Allowed tools:** Restrict the tool calls the model can make to a subset of the tools available to the model. + +**When to use allowed\_tools** + +You might want to configure an `allowed_tools` list in case you want to make only a subset of tools available across model requests, but not modify the list of tools you pass in, so you can maximize savings from [prompt caching](/docs/guides/prompt-caching). + +``` +"tool_choice": { + "type": "allowed_tools", + "mode": "auto", + "tools": [ + { "type": "function", "name": "get_weather" }, + { "type": "function", "name": "search_docs" } + ] + } +} +``` + +You can also set `tool_choice` to `"none"` to imitate the behavior of passing no functions. + +### Parallel function calling + +Parallel function calling is not possible when using [built-in tools](/docs/guides/tools). + +The model may choose to call multiple functions in a single turn. You can prevent this by setting `parallel_tool_calls` to `false`, which ensures exactly zero or one tool is called. + +**Note:** Currently, if you are using a fine tuned model and the model calls multiple functions in one turn then [strict mode](/docs/guides/function-calling#strict-mode) will be disabled for those calls. + +**Note for `gpt-4.1-nano-2025-04-14`:** This snapshot of `gpt-4.1-nano` can sometimes include multiple tools calls for the same tool if parallel tool calls are enabled. It is recommended to disable this feature when using this nano snapshot. + +### Strict mode + +Setting `strict` to `true` will ensure function calls reliably adhere to the function schema, instead of being best effort. We recommend always enabling strict mode. + +Under the hood, strict mode works by leveraging our [structured outputs](/docs/guides/structured-outputs) feature and therefore introduces a couple requirements: + +1. `additionalProperties` must be set to `false` for each object in the `parameters`. +2. All fields in `properties` must be marked as `required`. + +You can denote optional fields by adding `null` as a `type` option (see example below). + +Strict mode enabled + +``` +{ + "type": "function", + "name": "get_weather", + "description": "Retrieves current weather for the given location.", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Bogotá, Colombia" + }, + "units": { + "type": ["string", "null"], + "enum": ["celsius", "fahrenheit"], + "description": "Units the temperature will be returned in." + } + }, + "required": ["location", "units"], + "additionalProperties": false + } +} +``` + +Strict mode disabled + +``` +{ + "type": "function", + "name": "get_weather", + "description": "Retrieves current weather for the given location.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Bogotá, Colombia" + }, + "units": { + "type": "string", + "enum": ["celsius", "fahrenheit"], + "description": "Units the temperature will be returned in." + } + }, + "required": ["location"], + } +} +``` + +All schemas generated in the [playground](/playground) have strict mode enabled. + +While we recommend you enable strict mode, it has a few limitations: + +1. Some features of JSON schema are not supported. (See [supported schemas](/docs/guides/structured-outputs?context=with_parse#supported-schemas).) + +Specifically for fine tuned models: + +1. Schemas undergo additional processing on the first request (and are then cached). If your schemas vary from request to request, this may result in higher latencies. +2. Schemas are cached for performance, and are not eligible for [zero data retention](/docs/models#how-we-use-your-data). + +Streaming +--------- + +Streaming can be used to surface progress by showing which function is called as the model fills its arguments, and even displaying the arguments in real time. + +Streaming function calls is very similar to streaming regular responses: you set `stream` to `true` and get different `event` objects. + +Streaming function calls + +``` +from openai import OpenAI + +client = OpenAI() + +tools = [{ + "type": "function", + "name": "get_weather", + "description": "Get current temperature for a given location.", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "City and country e.g. Bogotá, Colombia" + } + }, + "required": [ + "location" + ], + "additionalProperties": False + } +}] + +stream = client.responses.create( + model="gpt-4.1", + input=[{"role": "user", "content": "What's the weather like in Paris today?"}], + tools=tools, + stream=True +) + +for event in stream: + print(event) +``` + +``` +import { OpenAI } from "openai"; + +const openai = new OpenAI(); + +const tools = [{ + type: "function", + name: "get_weather", + description: "Get current temperature for provided coordinates in celsius.", + parameters: { + type: "object", + properties: { + latitude: { type: "number" }, + longitude: { type: "number" } + }, + required: ["latitude", "longitude"], + additionalProperties: false + }, + strict: true +}]; + +const stream = await openai.responses.create({ + model: "gpt-4.1", + input: [{ role: "user", content: "What's the weather like in Paris today?" }], + tools, + stream: true, + store: true, +}); + +for await (const event of stream) { + console.log(event) +} +``` + +Output events + +``` +{"type":"response.output_item.added","response_id":"resp_1234xyz","output_index":0,"item":{"type":"function_call","id":"fc_1234xyz","call_id":"call_1234xyz","name":"get_weather","arguments":""}} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"{\""} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"location"} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"\":\""} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"Paris"} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":","} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":" France"} +{"type":"response.function_call_arguments.delta","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"delta":"\"}"} +{"type":"response.function_call_arguments.done","response_id":"resp_1234xyz","item_id":"fc_1234xyz","output_index":0,"arguments":"{\"location\":\"Paris, France\"}"} +{"type":"response.output_item.done","response_id":"resp_1234xyz","output_index":0,"item":{"type":"function_call","id":"fc_1234xyz","call_id":"call_1234xyz","name":"get_weather","arguments":"{\"location\":\"Paris, France\"}"}} +``` + +Instead of aggregating chunks into a single `content` string, however, you're aggregating chunks into an encoded `arguments` JSON object. + +When the model calls one or more functions an event of type `response.output_item.added` will be emitted for each function call that contains the following fields: + +|Field|Description| +|---|---| +|response_id|The id of the response that the function call belongs to| +|output_index|The index of the output item in the response. This represents the individual function calls in the response.| +|item|The in-progress function call item that includes a name, arguments and id field| + +Afterwards you will receive a series of events of type `response.function_call_arguments.delta` which will contain the `delta` of the `arguments` field. These events contain the following fields: + +|Field|Description| +|---|---| +|response_id|The id of the response that the function call belongs to| +|item_id|The id of the function call item that the delta belongs to| +|output_index|The index of the output item in the response. This represents the individual function calls in the response.| +|delta|The delta of the arguments field.| + +Below is a code snippet demonstrating how to aggregate the `delta`s into a final `tool_call` object. + +Accumulating tool\_call deltas + +``` +final_tool_calls = {} + +for event in stream: + if event.type === 'response.output_item.added': + final_tool_calls[event.output_index] = event.item; + elif event.type === 'response.function_call_arguments.delta': + index = event.output_index + + if final_tool_calls[index]: + final_tool_calls[index].arguments += event.delta +``` + +``` +const finalToolCalls = {}; + +for await (const event of stream) { + if (event.type === 'response.output_item.added') { + finalToolCalls[event.output_index] = event.item; + } else if (event.type === 'response.function_call_arguments.delta') { + const index = event.output_index; + + if (finalToolCalls[index]) { + finalToolCalls[index].arguments += event.delta; + } + } +} +``` + +Accumulated final\_tool\_calls\[0\] + +``` +{ + "type": "function_call", + "id": "fc_1234xyz", + "call_id": "call_2345abc", + "name": "get_weather", + "arguments": "{\"location\":\"Paris, France\"}" +} +``` + +When the model has finished calling the functions an event of type `response.function_call_arguments.done` will be emitted. This event contains the entire function call including the following fields: + +|Field|Description| +|---|---| +|response_id|The id of the response that the function call belongs to| +|output_index|The index of the output item in the response. This represents the individual function calls in the response.| +|item|The function call item that includes a name, arguments and id field.| + +Custom tools +------------ + +Custom tools work in much the same way as JSON schema-driven function tools. But rather than providing the model explicit instructions on what input your tool requires, the model can pass an arbitrary string back to your tool as input. This is useful to avoid unnecessarily wrapping a response in JSON, or to apply a custom grammar to the response (more on this below). + +The following code sample shows creating a custom tool that expects to receive a string of text containing Python code as a response. + +Custom tool calling example + +``` +from openai import OpenAI + +client = OpenAI() + +response = client.responses.create( + model="gpt-5", + input="Use the code_exec tool to print hello world to the console.", + tools=[ + { + "type": "custom", + "name": "code_exec", + "description": "Executes arbitrary Python code.", + } + ] +) +print(response.output) +``` + +``` +import OpenAI from "openai"; +const client = new OpenAI(); + +const response = await client.responses.create({ + model: "gpt-5", + input: "Use the code_exec tool to print hello world to the console.", + tools: [ + { + type: "custom", + name: "code_exec", + description: "Executes arbitrary Python code.", + }, + ], +}); + +console.log(response.output); +``` + +Just as before, the `output` array will contain a tool call generated by the model. Except this time, the tool call input is given as plain text. + +``` +[ + { + "id": "rs_6890e972fa7c819ca8bc561526b989170694874912ae0ea6", + "type": "reasoning", + "content": [], + "summary": [] + }, + { + "id": "ctc_6890e975e86c819c9338825b3e1994810694874912ae0ea6", + "type": "custom_tool_call", + "status": "completed", + "call_id": "call_aGiFQkRWSWAIsMQ19fKqxUgb", + "input": "print(\"hello world\")", + "name": "code_exec" + } +] +``` + +Context-free grammars +--------------------- + +A [context-free grammar](https://en.wikipedia.org/wiki/Context-free_grammar) (CFG) is a set of rules that define how to produce valid text in a given format. For custom tools, you can provide a CFG that will constrain the model's text input for a custom tool. + +You can provide a custom CFG using the `grammar` parameter when configuring a custom tool. Currently, we support two CFG syntaxes when defining grammars: `lark` and `regex`. + +Lark CFG +-------- + +Lark context free grammar example + +``` +from openai import OpenAI + +client = OpenAI() + +grammar = """ +start: expr +expr: term (SP ADD SP term)* -> add +| term +term: factor (SP MUL SP factor)* -> mul +| factor +factor: INT +SP: " " +ADD: "+" +MUL: "*" +%import common.INT +""" + +response = client.responses.create( + model="gpt-5", + input="Use the math_exp tool to add four plus four.", + tools=[ + { + "type": "custom", + "name": "math_exp", + "description": "Creates valid mathematical expressions", + "format": { + "type": "grammar", + "syntax": "lark", + "definition": grammar, + }, + } + ] +) +print(response.output) +``` + +``` +import OpenAI from "openai"; +const client = new OpenAI(); + +const grammar = ` +start: expr +expr: term (SP ADD SP term)* -> add +| term +term: factor (SP MUL SP factor)* -> mul +| factor +factor: INT +SP: " " +ADD: "+" +MUL: "*" +%import common.INT +`; + +const response = await client.responses.create({ + model: "gpt-5", + input: "Use the math_exp tool to add four plus four.", + tools: [ + { + type: "custom", + name: "math_exp", + description: "Creates valid mathematical expressions", + format: { + type: "grammar", + syntax: "lark", + definition: grammar, + }, + }, + ], +}); + +console.log(response.output); +``` + +The output from the tool should then conform to the Lark CFG that you defined: + +``` +[ + { + "id": "rs_6890ed2b6374819dbbff5353e6664ef103f4db9848be4829", + "type": "reasoning", + "content": [], + "summary": [] + }, + { + "id": "ctc_6890ed2f32e8819daa62bef772b8c15503f4db9848be4829", + "type": "custom_tool_call", + "status": "completed", + "call_id": "call_pmlLjmvG33KJdyVdC4MVdk5N", + "input": "4 + 4", + "name": "math_exp" + } +] +``` + +Grammars are specified using a variation of [Lark](https://lark-parser.readthedocs.io/en/stable/index.html). Model sampling is constrained using [LLGuidance](https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md). Some features of Lark are not supported: + +* Lookarounds in lexer regexes +* Lazy modifiers (`*?`, `+?`, `??`) in lexer regexes +* Priorities of terminals +* Templates +* Imports (other than built-in `%import` common) +* `%declare`s + +We recommend using the [Lark IDE](https://www.lark-parser.org/ide/) to experiment with custom grammars. + +### Keep grammars simple + +Try to make your grammar as simple as possible. The OpenAI API may return an error if the grammar is too complex, so you should ensure that your desired grammar is compatible before using it in the API. + +Lark grammars can be tricky to perfect. While simple grammars perform most reliably, complex grammars often require iteration on the grammar definition itself, the prompt, and the tool description to ensure that the model does not go out of distribution. + +### Correct versus incorrect patterns + +Correct (single, bounded terminal): + +``` +start: SENTENCE +SENTENCE: /[A-Za-z, ]*(the hero|a dragon|an old man|the princess)[A-Za-z, ]*(fought|saved|found|lost)[A-Za-z, ]*(a treasure|the kingdom|a secret|his way)[A-Za-z, ]*\./ +``` + +Do NOT do this (splitting across rules/terminals). This attempts to let rules partition free text between terminals. The lexer will greedily match the free-text pieces and you'll lose control: + +``` +start: sentence +sentence: /[A-Za-z, ]+/ subject /[A-Za-z, ]+/ verb /[A-Za-z, ]+/ object /[A-Za-z, ]+/ +``` + +Lowercase rules don't influence how terminals are cut from the input—only terminal definitions do. When you need “free text between anchors,” make it one giant regex terminal so the lexer matches it exactly once with the structure you intend. + +### Terminals versus rules + +Lark uses terminals for lexer tokens (by convention, `UPPERCASE`) and rules for parser productions (by convention, `lowercase`). The most practical way to stay within the supported subset and avoid surprises is to keep your grammar simple and explicit, and to use terminals and rules with a clear separation of concerns. + +The regex syntax used by terminals is the [Rust regex crate syntax](https://docs.rs/regex/latest/regex/#syntax), not Python's `re` [module](https://docs.python.org/3/library/re.html). + +### Key ideas and best practices + +**Lexer runs before the parser** + +Terminals are matched by the lexer (greedily / longest match wins) before any CFG rule logic is applied. If you try to "shape" a terminal by splitting it across several rules, the lexer cannot be guided by those rules—only by terminal regexes. + +**Prefer one terminal when you're carving text out of freeform spans** + +If you need to recognize a pattern embedded in arbitrary text (e.g., natural language with “anything” between anchors), express that as a single terminal. Do not try to interleave free‑text terminals with parser rules; the greedy lexer will not respect your intended boundaries and it is highly likely the model will go out of distribution. + +**Use rules to compose discrete tokens** + +Rules are ideal when you're combining clearly delimited terminals (numbers, keywords, punctuation) into larger structures. They're not the right tool for constraining "the stuff in between" two terminals. + +**Keep terminals simple, bounded, and self-contained** + +Favor explicit character classes and bounded quantifiers (`{0,10}`, not unbounded `*` everywhere). If you need "any text up to a period", prefer something like `/[^.\n]{0,10}*\./` rather than `/.+\./` to avoid runaway growth. + +**Use rules to combine tokens, not to steer regex internals** + +Good rule usage example: + +``` +start: expr +NUMBER: /[0-9]+/ +PLUS: "+" +MINUS: "-" +expr: term (("+"|"-") term)* +term: NUMBER +``` + +**Treat whitespace explicitly** + +Don't rely on open-ended `%ignore` directives. Using unbounded ignore directives may cause the grammar to be too complex and/or may cause the model to go out of distribution. Prefer threading explicit terminals wherever whitespace is allowed. + +### Troubleshooting + +* If the API rejects the grammar because it is too complex, simplify the rules and terminals and remove unbounded `%ignore`s. +* If custom tools are called with unexpected tokens, confirm terminals aren’t overlapping; check greedy lexer. +* When the model drifts "out‑of‑distribution" (shows up as the model producing excessively long or repetitive outputs, it is syntactically valid but is semantically wrong): + * Tighten the grammar. + * Iterate on the prompt (add few-shot examples) and tool description (explain the grammar and instruct the model to reason and conform to it). + * Experiment with a higher reasoning effort (e.g, bump from medium to high). + +Regex CFG +--------- + +Regex context free grammar example + +``` +from openai import OpenAI + +client = OpenAI() + +grammar = r"^(?PJanuary|February|March|April|May|June|July|August|September|October|November|December)\s+(?P\d{1,2})(?:st|nd|rd|th)?\s+(?P\d{4})\s+at\s+(?P0?[1-9]|1[0-2])(?PAM|PM)$" + +response = client.responses.create( + model="gpt-5", + input="Use the timestamp tool to save a timestamp for August 7th 2025 at 10AM.", + tools=[ + { + "type": "custom", + "name": "timestamp", + "description": "Saves a timestamp in date + time in 24-hr format.", + "format": { + "type": "grammar", + "syntax": "regex", + "definition": grammar, + }, + } + ] +) +print(response.output) +``` + +``` +import OpenAI from "openai"; +const client = new OpenAI(); + +const grammar = "^(?PJanuary|February|March|April|May|June|July|August|September|October|November|December)\s+(?P\d{1,2})(?:st|nd|rd|th)?\s+(?P\d{4})\s+at\s+(?P0?[1-9]|1[0-2])(?PAM|PM)$"; + +const response = await client.responses.create({ + model: "gpt-5", + input: "Use the timestamp tool to save a timestamp for August 7th 2025 at 10AM.", + tools: [ + { + type: "custom", + name: "timestamp", + description: "Saves a timestamp in date + time in 24-hr format.", + format: { + type: "grammar", + syntax: "regex", + definition: grammar, + }, + }, + ], +}); + +console.log(response.output); +``` + +The output from the tool should then conform to the Regex CFG that you defined: + +``` +[ + { + "id": "rs_6894f7a3dd4c81a1823a723a00bfa8710d7962f622d1c260", + "type": "reasoning", + "content": [], + "summary": [] + }, + { + "id": "ctc_6894f7ad7fb881a1bffa1f377393b1a40d7962f622d1c260", + "type": "custom_tool_call", + "status": "completed", + "call_id": "call_8m4XCnYvEmFlzHgDHbaOCFlK", + "input": "August 7th 2025 at 10AM", + "name": "timestamp" + } +] +``` + +As with the Lark syntax, regexes use the [Rust regex crate syntax](https://docs.rs/regex/latest/regex/#syntax), not Python's `re` [module](https://docs.python.org/3/library/re.html). + +Some features of Regex are not supported: + +* Lookarounds +* Lazy modifiers (`*?`, `+?`, `??`) + +### Key ideas and best practices + +**Pattern must be on one line** + +If you need to match a newline in the input, use the escaped sequence `\n`. Do not use verbose/extended mode, which allows patterns to span multiple lines. + +**Provide the regex as a plain pattern string** + +Don't enclose the pattern in `//`. \ No newline at end of file diff --git a/llama.cpp b/llama.cpp new file mode 160000 index 00000000..d77d7c5c --- /dev/null +++ b/llama.cpp @@ -0,0 +1 @@ +Subproject commit d77d7c5c0654dc52b51f03941b12ae85d7227608