2615f489d6
The `ConvertOpenAIResponsesRequestToGemini` function had code that attempted to uppercase JSON Schema type values (e.g. "string" -> "STRING") for Gemini compatibility. This broke nullable types because when `type` is a JSON array like `["string", "null"]`: 1. `gjson.Result.String()` returns the raw JSON text `["string","null"]` 2. `strings.ToUpper()` produces `["STRING","NULL"]` 3. `sjson.Set()` stores it as a JSON **string** `"[\"STRING\",\"NULL\"]"` instead of a JSON array 4. The downstream `CleanJSONSchemaForGemini()` / `flattenTypeArrays()` cannot detect it (since `IsArray()` returns false on a string) 5. Gemini/Antigravity API rejects it with: `400 Invalid value at '...type' (Type), "["STRING","NULL"]"` This was confirmed and tested with Droid Factory (Antigravity) Gemini models where Claude Code sends tool schemas with nullable parameters. The fix removes the uppercasing logic entirely and passes the raw schema through to `parametersJsonSchema`. This is safe because: - Antigravity executor already runs `CleanJSONSchemaForGemini()` which properly handles type arrays, nullable fields, and all schema cleanup - Gemini/Vertex executors use `parametersJsonSchema` which accepts raw JSON Schema directly (no uppercasing needed) - The uppercasing code also only iterated top-level properties, missing nested schemas entirely Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
404 lines
14 KiB
Go
404 lines
14 KiB
Go
package responses
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
|
"github.com/tidwall/gjson"
|
|
"github.com/tidwall/sjson"
|
|
)
|
|
|
|
const geminiResponsesThoughtSignature = "skip_thought_signature_validator"
|
|
|
|
func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte, stream bool) []byte {
|
|
rawJSON := inputRawJSON
|
|
|
|
// Note: modelName and stream parameters are part of the fixed method signature
|
|
_ = modelName // Unused but required by interface
|
|
_ = stream // Unused but required by interface
|
|
|
|
// Base Gemini API template (do not include thinkingConfig by default)
|
|
out := `{"contents":[]}`
|
|
|
|
root := gjson.ParseBytes(rawJSON)
|
|
|
|
// Extract system instruction from OpenAI "instructions" field
|
|
if instructions := root.Get("instructions"); instructions.Exists() {
|
|
systemInstr := `{"parts":[{"text":""}]}`
|
|
systemInstr, _ = sjson.Set(systemInstr, "parts.0.text", instructions.String())
|
|
out, _ = sjson.SetRaw(out, "system_instruction", systemInstr)
|
|
}
|
|
|
|
// Convert input messages to Gemini contents format
|
|
if input := root.Get("input"); input.Exists() && input.IsArray() {
|
|
items := input.Array()
|
|
|
|
// Normalize consecutive function calls and outputs so each call is immediately followed by its response
|
|
normalized := make([]gjson.Result, 0, len(items))
|
|
for i := 0; i < len(items); {
|
|
item := items[i]
|
|
itemType := item.Get("type").String()
|
|
itemRole := item.Get("role").String()
|
|
if itemType == "" && itemRole != "" {
|
|
itemType = "message"
|
|
}
|
|
|
|
if itemType == "function_call" {
|
|
var calls []gjson.Result
|
|
var outputs []gjson.Result
|
|
|
|
for i < len(items) {
|
|
next := items[i]
|
|
nextType := next.Get("type").String()
|
|
nextRole := next.Get("role").String()
|
|
if nextType == "" && nextRole != "" {
|
|
nextType = "message"
|
|
}
|
|
if nextType != "function_call" {
|
|
break
|
|
}
|
|
calls = append(calls, next)
|
|
i++
|
|
}
|
|
|
|
for i < len(items) {
|
|
next := items[i]
|
|
nextType := next.Get("type").String()
|
|
nextRole := next.Get("role").String()
|
|
if nextType == "" && nextRole != "" {
|
|
nextType = "message"
|
|
}
|
|
if nextType != "function_call_output" {
|
|
break
|
|
}
|
|
outputs = append(outputs, next)
|
|
i++
|
|
}
|
|
|
|
if len(calls) > 0 {
|
|
outputMap := make(map[string]gjson.Result, len(outputs))
|
|
for _, out := range outputs {
|
|
outputMap[out.Get("call_id").String()] = out
|
|
}
|
|
for _, call := range calls {
|
|
normalized = append(normalized, call)
|
|
callID := call.Get("call_id").String()
|
|
if resp, ok := outputMap[callID]; ok {
|
|
normalized = append(normalized, resp)
|
|
delete(outputMap, callID)
|
|
}
|
|
}
|
|
for _, out := range outputs {
|
|
if _, ok := outputMap[out.Get("call_id").String()]; ok {
|
|
normalized = append(normalized, out)
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
if itemType == "function_call_output" {
|
|
normalized = append(normalized, item)
|
|
i++
|
|
continue
|
|
}
|
|
|
|
normalized = append(normalized, item)
|
|
i++
|
|
}
|
|
|
|
for _, item := range normalized {
|
|
itemType := item.Get("type").String()
|
|
itemRole := item.Get("role").String()
|
|
if itemType == "" && itemRole != "" {
|
|
itemType = "message"
|
|
}
|
|
|
|
switch itemType {
|
|
case "message":
|
|
if strings.EqualFold(itemRole, "system") {
|
|
if contentArray := item.Get("content"); contentArray.Exists() && contentArray.IsArray() {
|
|
var builder strings.Builder
|
|
contentArray.ForEach(func(_, contentItem gjson.Result) bool {
|
|
text := contentItem.Get("text").String()
|
|
if builder.Len() > 0 && text != "" {
|
|
builder.WriteByte('\n')
|
|
}
|
|
builder.WriteString(text)
|
|
return true
|
|
})
|
|
if !gjson.Get(out, "system_instruction").Exists() {
|
|
systemInstr := `{"parts":[{"text":""}]}`
|
|
systemInstr, _ = sjson.Set(systemInstr, "parts.0.text", builder.String())
|
|
out, _ = sjson.SetRaw(out, "system_instruction", systemInstr)
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Handle regular messages
|
|
// Note: In Responses format, model outputs may appear as content items with type "output_text"
|
|
// even when the message.role is "user". We split such items into distinct Gemini messages
|
|
// with roles derived from the content type to match docs/convert-2.md.
|
|
if contentArray := item.Get("content"); contentArray.Exists() && contentArray.IsArray() {
|
|
currentRole := ""
|
|
var currentParts []string
|
|
|
|
flush := func() {
|
|
if currentRole == "" || len(currentParts) == 0 {
|
|
currentParts = nil
|
|
return
|
|
}
|
|
one := `{"role":"","parts":[]}`
|
|
one, _ = sjson.Set(one, "role", currentRole)
|
|
for _, part := range currentParts {
|
|
one, _ = sjson.SetRaw(one, "parts.-1", part)
|
|
}
|
|
out, _ = sjson.SetRaw(out, "contents.-1", one)
|
|
currentParts = nil
|
|
}
|
|
|
|
contentArray.ForEach(func(_, contentItem gjson.Result) bool {
|
|
contentType := contentItem.Get("type").String()
|
|
if contentType == "" {
|
|
contentType = "input_text"
|
|
}
|
|
|
|
effRole := "user"
|
|
if itemRole != "" {
|
|
switch strings.ToLower(itemRole) {
|
|
case "assistant", "model":
|
|
effRole = "model"
|
|
default:
|
|
effRole = strings.ToLower(itemRole)
|
|
}
|
|
}
|
|
if contentType == "output_text" {
|
|
effRole = "model"
|
|
}
|
|
if effRole == "assistant" {
|
|
effRole = "model"
|
|
}
|
|
|
|
if currentRole != "" && effRole != currentRole {
|
|
flush()
|
|
currentRole = ""
|
|
}
|
|
if currentRole == "" {
|
|
currentRole = effRole
|
|
}
|
|
|
|
var partJSON string
|
|
switch contentType {
|
|
case "input_text", "output_text":
|
|
if text := contentItem.Get("text"); text.Exists() {
|
|
partJSON = `{"text":""}`
|
|
partJSON, _ = sjson.Set(partJSON, "text", text.String())
|
|
}
|
|
case "input_image":
|
|
imageURL := contentItem.Get("image_url").String()
|
|
if imageURL == "" {
|
|
imageURL = contentItem.Get("url").String()
|
|
}
|
|
if imageURL != "" {
|
|
mimeType := "application/octet-stream"
|
|
data := ""
|
|
if strings.HasPrefix(imageURL, "data:") {
|
|
trimmed := strings.TrimPrefix(imageURL, "data:")
|
|
mediaAndData := strings.SplitN(trimmed, ";base64,", 2)
|
|
if len(mediaAndData) == 2 {
|
|
if mediaAndData[0] != "" {
|
|
mimeType = mediaAndData[0]
|
|
}
|
|
data = mediaAndData[1]
|
|
} else {
|
|
mediaAndData = strings.SplitN(trimmed, ",", 2)
|
|
if len(mediaAndData) == 2 {
|
|
if mediaAndData[0] != "" {
|
|
mimeType = mediaAndData[0]
|
|
}
|
|
data = mediaAndData[1]
|
|
}
|
|
}
|
|
}
|
|
if data != "" {
|
|
partJSON = `{"inline_data":{"mime_type":"","data":""}}`
|
|
partJSON, _ = sjson.Set(partJSON, "inline_data.mime_type", mimeType)
|
|
partJSON, _ = sjson.Set(partJSON, "inline_data.data", data)
|
|
}
|
|
}
|
|
}
|
|
|
|
if partJSON != "" {
|
|
currentParts = append(currentParts, partJSON)
|
|
}
|
|
return true
|
|
})
|
|
|
|
flush()
|
|
}
|
|
|
|
case "function_call":
|
|
// Handle function calls - convert to model message with functionCall
|
|
name := item.Get("name").String()
|
|
arguments := item.Get("arguments").String()
|
|
|
|
modelContent := `{"role":"model","parts":[]}`
|
|
functionCall := `{"functionCall":{"name":"","args":{}}}`
|
|
functionCall, _ = sjson.Set(functionCall, "functionCall.name", name)
|
|
functionCall, _ = sjson.Set(functionCall, "thoughtSignature", geminiResponsesThoughtSignature)
|
|
functionCall, _ = sjson.Set(functionCall, "functionCall.id", item.Get("call_id").String())
|
|
|
|
// Parse arguments JSON string and set as args object
|
|
if arguments != "" {
|
|
argsResult := gjson.Parse(arguments)
|
|
functionCall, _ = sjson.SetRaw(functionCall, "functionCall.args", argsResult.Raw)
|
|
}
|
|
|
|
modelContent, _ = sjson.SetRaw(modelContent, "parts.-1", functionCall)
|
|
out, _ = sjson.SetRaw(out, "contents.-1", modelContent)
|
|
|
|
case "function_call_output":
|
|
// Handle function call outputs - convert to function message with functionResponse
|
|
callID := item.Get("call_id").String()
|
|
// Use .Raw to preserve the JSON encoding (includes quotes for strings)
|
|
outputRaw := item.Get("output").Str
|
|
|
|
functionContent := `{"role":"function","parts":[]}`
|
|
functionResponse := `{"functionResponse":{"name":"","response":{}}}`
|
|
|
|
// We need to extract the function name from the previous function_call
|
|
// For now, we'll use a placeholder or extract from context if available
|
|
functionName := "unknown" // This should ideally be matched with the corresponding function_call
|
|
|
|
// Find the corresponding function call name by matching call_id
|
|
// We need to look back through the input array to find the matching call
|
|
if inputArray := root.Get("input"); inputArray.Exists() && inputArray.IsArray() {
|
|
inputArray.ForEach(func(_, prevItem gjson.Result) bool {
|
|
if prevItem.Get("type").String() == "function_call" && prevItem.Get("call_id").String() == callID {
|
|
functionName = prevItem.Get("name").String()
|
|
return false // Stop iteration
|
|
}
|
|
return true
|
|
})
|
|
}
|
|
|
|
functionResponse, _ = sjson.Set(functionResponse, "functionResponse.name", functionName)
|
|
functionResponse, _ = sjson.Set(functionResponse, "functionResponse.id", callID)
|
|
|
|
// Set the raw JSON output directly (preserves string encoding)
|
|
if outputRaw != "" && outputRaw != "null" {
|
|
output := gjson.Parse(outputRaw)
|
|
if output.Type == gjson.JSON {
|
|
functionResponse, _ = sjson.SetRaw(functionResponse, "functionResponse.response.result", output.Raw)
|
|
} else {
|
|
functionResponse, _ = sjson.Set(functionResponse, "functionResponse.response.result", outputRaw)
|
|
}
|
|
}
|
|
functionContent, _ = sjson.SetRaw(functionContent, "parts.-1", functionResponse)
|
|
out, _ = sjson.SetRaw(out, "contents.-1", functionContent)
|
|
|
|
case "reasoning":
|
|
thoughtContent := `{"role":"model","parts":[]}`
|
|
thought := `{"text":"","thoughtSignature":"","thought":true}`
|
|
thought, _ = sjson.Set(thought, "text", item.Get("summary.0.text").String())
|
|
thought, _ = sjson.Set(thought, "thoughtSignature", item.Get("encrypted_content").String())
|
|
|
|
thoughtContent, _ = sjson.SetRaw(thoughtContent, "parts.-1", thought)
|
|
out, _ = sjson.SetRaw(out, "contents.-1", thoughtContent)
|
|
}
|
|
}
|
|
} else if input.Exists() && input.Type == gjson.String {
|
|
// Simple string input conversion to user message
|
|
userContent := `{"role":"user","parts":[{"text":""}]}`
|
|
userContent, _ = sjson.Set(userContent, "parts.0.text", input.String())
|
|
out, _ = sjson.SetRaw(out, "contents.-1", userContent)
|
|
}
|
|
|
|
// Convert tools to Gemini functionDeclarations format
|
|
if tools := root.Get("tools"); tools.Exists() && tools.IsArray() {
|
|
geminiTools := `[{"functionDeclarations":[]}]`
|
|
|
|
tools.ForEach(func(_, tool gjson.Result) bool {
|
|
if tool.Get("type").String() == "function" {
|
|
funcDecl := `{"name":"","description":"","parametersJsonSchema":{}}`
|
|
|
|
if name := tool.Get("name"); name.Exists() {
|
|
funcDecl, _ = sjson.Set(funcDecl, "name", name.String())
|
|
}
|
|
if desc := tool.Get("description"); desc.Exists() {
|
|
funcDecl, _ = sjson.Set(funcDecl, "description", desc.String())
|
|
}
|
|
if params := tool.Get("parameters"); params.Exists() {
|
|
funcDecl, _ = sjson.SetRaw(funcDecl, "parametersJsonSchema", params.Raw)
|
|
}
|
|
|
|
geminiTools, _ = sjson.SetRaw(geminiTools, "0.functionDeclarations.-1", funcDecl)
|
|
}
|
|
return true
|
|
})
|
|
|
|
// Only add tools if there are function declarations
|
|
if funcDecls := gjson.Get(geminiTools, "0.functionDeclarations"); funcDecls.Exists() && len(funcDecls.Array()) > 0 {
|
|
out, _ = sjson.SetRaw(out, "tools", geminiTools)
|
|
}
|
|
}
|
|
|
|
// Handle generation config from OpenAI format
|
|
if maxOutputTokens := root.Get("max_output_tokens"); maxOutputTokens.Exists() {
|
|
genConfig := `{"maxOutputTokens":0}`
|
|
genConfig, _ = sjson.Set(genConfig, "maxOutputTokens", maxOutputTokens.Int())
|
|
out, _ = sjson.SetRaw(out, "generationConfig", genConfig)
|
|
}
|
|
|
|
// Handle temperature if present
|
|
if temperature := root.Get("temperature"); temperature.Exists() {
|
|
if !gjson.Get(out, "generationConfig").Exists() {
|
|
out, _ = sjson.SetRaw(out, "generationConfig", `{}`)
|
|
}
|
|
out, _ = sjson.Set(out, "generationConfig.temperature", temperature.Float())
|
|
}
|
|
|
|
// Handle top_p if present
|
|
if topP := root.Get("top_p"); topP.Exists() {
|
|
if !gjson.Get(out, "generationConfig").Exists() {
|
|
out, _ = sjson.SetRaw(out, "generationConfig", `{}`)
|
|
}
|
|
out, _ = sjson.Set(out, "generationConfig.topP", topP.Float())
|
|
}
|
|
|
|
// Handle stop sequences
|
|
if stopSequences := root.Get("stop_sequences"); stopSequences.Exists() && stopSequences.IsArray() {
|
|
if !gjson.Get(out, "generationConfig").Exists() {
|
|
out, _ = sjson.SetRaw(out, "generationConfig", `{}`)
|
|
}
|
|
var sequences []string
|
|
stopSequences.ForEach(func(_, seq gjson.Result) bool {
|
|
sequences = append(sequences, seq.String())
|
|
return true
|
|
})
|
|
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
|
|
}
|
|
|
|
// Apply thinking configuration: convert OpenAI Responses API reasoning.effort to Gemini thinkingConfig.
|
|
// Inline translation-only mapping; capability checks happen later in ApplyThinking.
|
|
re := root.Get("reasoning.effort")
|
|
if re.Exists() {
|
|
effort := strings.ToLower(strings.TrimSpace(re.String()))
|
|
if effort != "" {
|
|
thinkingPath := "generationConfig.thinkingConfig"
|
|
if effort == "auto" {
|
|
out, _ = sjson.Set(out, thinkingPath+".thinkingBudget", -1)
|
|
out, _ = sjson.Set(out, thinkingPath+".includeThoughts", true)
|
|
} else {
|
|
out, _ = sjson.Set(out, thinkingPath+".thinkingLevel", effort)
|
|
out, _ = sjson.Set(out, thinkingPath+".includeThoughts", effort != "none")
|
|
}
|
|
}
|
|
}
|
|
|
|
result := []byte(out)
|
|
result = common.AttachDefaultSafetySettings(result, "safetySettings")
|
|
return result
|
|
}
|