Files
CLIProxyAPI/internal/translator/gemini-cli/claude/gemini-cli_claude_response.go
sususu98 2398ebad55 fix(translator): sanitize tool names for Gemini function_declarations compatibility
Claude Code and MCP clients may send tool names containing characters
invalid for Gemini's function_declarations (e.g. '/', '@', spaces).
Sanitize on request via SanitizeFunctionName and restore original names
on response for both antigravity/claude and gemini-cli/claude translators.
2026-03-22 13:10:53 +08:00

359 lines
17 KiB
Go

// Package claude provides response translation functionality for Claude Code API compatibility.
// This package handles the conversion of backend client responses into Claude Code-compatible
// Server-Sent Events (SSE) format, implementing a sophisticated state machine that manages
// different response types including text content, thinking processes, and function calls.
// The translation ensures proper sequencing of SSE events and maintains state across
// multiple response chunks to provide a seamless streaming experience.
package claude
import (
"bytes"
"context"
"fmt"
"strings"
"sync/atomic"
"time"
translatorcommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/common"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
// Params holds parameters for response conversion and maintains state across streaming chunks.
// This structure tracks the current state of the response translation process to ensure
// proper sequencing of SSE events and transitions between different content types.
type Params struct {
HasFirstResponse bool // Indicates if the initial message_start event has been sent
ResponseType int // Current response type: 0=none, 1=content, 2=thinking, 3=function
ResponseIndex int // Index counter for content blocks in the streaming response
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
// Reverse map: sanitized Gemini function name → original Claude tool name.
ToolNameMap map[string]string
}
// toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
var toolUseIDCounter uint64
// ConvertGeminiCLIResponseToClaude performs sophisticated streaming response format conversion.
// This function implements a complex state machine that translates backend client responses
// into Claude Code-compatible Server-Sent Events (SSE) format. It manages different response types
// and handles state transitions between content blocks, thinking processes, and function calls.
//
// Response type states: 0=none, 1=content, 2=thinking, 3=function
// The function maintains state across multiple calls to ensure proper SSE event sequencing.
//
// Parameters:
// - ctx: The context for the request, used for cancellation and timeout handling
// - modelName: The name of the model being used for the response (unused in current implementation)
// - rawJSON: The raw JSON response from the Gemini CLI API
// - param: A pointer to a parameter object for maintaining state between calls
//
// Returns:
// - [][]byte: A slice of bytes, each containing a Claude Code-compatible SSE payload.
func ConvertGeminiCLIResponseToClaude(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) [][]byte {
if *param == nil {
*param = &Params{
HasFirstResponse: false,
ResponseType: 0,
ResponseIndex: 0,
ToolNameMap: util.SanitizedToolNameMap(originalRequestRawJSON),
}
}
if bytes.Equal(rawJSON, []byte("[DONE]")) {
// Only send message_stop if we have actually output content
if (*param).(*Params).HasContent {
return [][]byte{translatorcommon.AppendSSEEventString(nil, "message_stop", `{"type":"message_stop"}`, 3)}
}
return [][]byte{}
}
// Track whether tools are being used in this response chunk
usedTool := false
output := make([]byte, 0, 1024)
appendEvent := func(event, payload string) {
output = translatorcommon.AppendSSEEventString(output, event, payload, 3)
}
// Initialize the streaming session with a message_start event
// This is only sent for the very first response chunk to establish the streaming session
if !(*param).(*Params).HasFirstResponse {
// Create the initial message structure with default values according to Claude Code API specification
// This follows the Claude Code API specification for streaming message initialization
messageStartTemplate := []byte(`{"type":"message_start","message":{"id":"msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY","type":"message","role":"assistant","content":[],"model":"claude-3-5-sonnet-20241022","stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}}`)
// Override default values with actual response metadata if available from the Gemini CLI response
if modelVersionResult := gjson.GetBytes(rawJSON, "response.modelVersion"); modelVersionResult.Exists() {
messageStartTemplate, _ = sjson.SetBytes(messageStartTemplate, "message.model", modelVersionResult.String())
}
if responseIDResult := gjson.GetBytes(rawJSON, "response.responseId"); responseIDResult.Exists() {
messageStartTemplate, _ = sjson.SetBytes(messageStartTemplate, "message.id", responseIDResult.String())
}
appendEvent("message_start", string(messageStartTemplate))
(*param).(*Params).HasFirstResponse = true
}
// Process the response parts array from the backend client
// Each part can contain text content, thinking content, or function calls
partsResult := gjson.GetBytes(rawJSON, "response.candidates.0.content.parts")
if partsResult.IsArray() {
partResults := partsResult.Array()
for i := 0; i < len(partResults); i++ {
partResult := partResults[i]
// Extract the different types of content from each part
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
// Handle text content (both regular content and thinking)
if partTextResult.Exists() {
// Process thinking content (internal reasoning)
if partResult.Get("thought").Bool() {
// Continue existing thinking block if already in thinking state
if (*param).(*Params).ResponseType == 2 {
data, _ := sjson.SetBytes([]byte(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex)), "delta.thinking", partTextResult.String())
appendEvent("content_block_delta", string(data))
(*param).(*Params).HasContent = true
} else {
// Transition from another state to thinking
// First, close any existing content block
if (*param).(*Params).ResponseType != 0 {
if (*param).(*Params).ResponseType == 2 {
// output = output + "event: content_block_delta\n"
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
// output = output + "\n\n\n"
}
appendEvent("content_block_stop", fmt.Sprintf(`{"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
(*param).(*Params).ResponseIndex++
}
// Start a new thinking content block
appendEvent("content_block_start", fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"thinking","thinking":""}}`, (*param).(*Params).ResponseIndex))
data, _ := sjson.SetBytes([]byte(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"thinking_delta","thinking":""}}`, (*param).(*Params).ResponseIndex)), "delta.thinking", partTextResult.String())
appendEvent("content_block_delta", string(data))
(*param).(*Params).ResponseType = 2 // Set state to thinking
(*param).(*Params).HasContent = true
}
} else {
// Process regular text content (user-visible output)
// Continue existing text block if already in content state
if (*param).(*Params).ResponseType == 1 {
data, _ := sjson.SetBytes([]byte(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex)), "delta.text", partTextResult.String())
appendEvent("content_block_delta", string(data))
(*param).(*Params).HasContent = true
} else {
// Transition from another state to text content
// First, close any existing content block
if (*param).(*Params).ResponseType != 0 {
if (*param).(*Params).ResponseType == 2 {
// output = output + "event: content_block_delta\n"
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
// output = output + "\n\n\n"
}
appendEvent("content_block_stop", fmt.Sprintf(`{"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
(*param).(*Params).ResponseIndex++
}
// Start a new text content block
appendEvent("content_block_start", fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"text","text":""}}`, (*param).(*Params).ResponseIndex))
data, _ := sjson.SetBytes([]byte(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"text_delta","text":""}}`, (*param).(*Params).ResponseIndex)), "delta.text", partTextResult.String())
appendEvent("content_block_delta", string(data))
(*param).(*Params).ResponseType = 1 // Set state to content
(*param).(*Params).HasContent = true
}
}
} else if functionCallResult.Exists() {
// Handle function/tool calls from the AI model
// This processes tool usage requests and formats them for Claude Code API compatibility
usedTool = true
fcName := util.RestoreSanitizedToolName((*param).(*Params).ToolNameMap, functionCallResult.Get("name").String())
// Handle state transitions when switching to function calls
// Close any existing function call block first
if (*param).(*Params).ResponseType == 3 {
appendEvent("content_block_stop", fmt.Sprintf(`{"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
(*param).(*Params).ResponseIndex++
(*param).(*Params).ResponseType = 0
}
// Special handling for thinking state transition
if (*param).(*Params).ResponseType == 2 {
// output = output + "event: content_block_delta\n"
// output = output + fmt.Sprintf(`data: {"type":"content_block_delta","index":%d,"delta":{"type":"signature_delta","signature":null}}`, (*param).(*Params).ResponseIndex)
// output = output + "\n\n\n"
}
// Close any other existing content block
if (*param).(*Params).ResponseType != 0 {
appendEvent("content_block_stop", fmt.Sprintf(`{"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
(*param).(*Params).ResponseIndex++
}
// Start a new tool use content block
// This creates the structure for a function call in Claude Code format
// Create the tool use block with unique ID and function details
data := []byte(fmt.Sprintf(`{"type":"content_block_start","index":%d,"content_block":{"type":"tool_use","id":"","name":"","input":{}}}`, (*param).(*Params).ResponseIndex))
data, _ = sjson.SetBytes(data, "content_block.id", util.SanitizeClaudeToolID(fmt.Sprintf("%s-%d-%d", fcName, time.Now().UnixNano(), atomic.AddUint64(&toolUseIDCounter, 1))))
data, _ = sjson.SetBytes(data, "content_block.name", fcName)
appendEvent("content_block_start", string(data))
if fcArgsResult := functionCallResult.Get("args"); fcArgsResult.Exists() {
data, _ = sjson.SetBytes([]byte(fmt.Sprintf(`{"type":"content_block_delta","index":%d,"delta":{"type":"input_json_delta","partial_json":""}}`, (*param).(*Params).ResponseIndex)), "delta.partial_json", fcArgsResult.Raw)
appendEvent("content_block_delta", string(data))
}
(*param).(*Params).ResponseType = 3
(*param).(*Params).HasContent = true
}
}
}
usageResult := gjson.GetBytes(rawJSON, "response.usageMetadata")
// Process usage metadata and finish reason when present in the response
if usageResult.Exists() && bytes.Contains(rawJSON, []byte(`"finishReason"`)) {
if candidatesTokenCountResult := usageResult.Get("candidatesTokenCount"); candidatesTokenCountResult.Exists() {
// Only send final events if we have actually output content
if (*param).(*Params).HasContent {
// Close the final content block
appendEvent("content_block_stop", fmt.Sprintf(`{"type":"content_block_stop","index":%d}`, (*param).(*Params).ResponseIndex))
// Create the message delta template with appropriate stop reason
template := []byte(`{"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`)
// Set tool_use stop reason if tools were used in this response
if usedTool {
template = []byte(`{"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`)
} else if finish := gjson.GetBytes(rawJSON, "response.candidates.0.finishReason"); finish.Exists() && finish.String() == "MAX_TOKENS" {
template = []byte(`{"type":"message_delta","delta":{"stop_reason":"max_tokens","stop_sequence":null},"usage":{"input_tokens":0,"output_tokens":0}}`)
}
// Include thinking tokens in output token count if present
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
template, _ = sjson.SetBytes(template, "usage.output_tokens", candidatesTokenCountResult.Int()+thoughtsTokenCount)
template, _ = sjson.SetBytes(template, "usage.input_tokens", usageResult.Get("promptTokenCount").Int())
appendEvent("message_delta", string(template))
}
}
}
return [][]byte{output}
}
// ConvertGeminiCLIResponseToClaudeNonStream converts a non-streaming Gemini CLI response to a non-streaming Claude response.
//
// Parameters:
// - ctx: The context for the request.
// - modelName: The name of the model.
// - rawJSON: The raw JSON response from the Gemini CLI API.
// - param: A pointer to a parameter object for the conversion.
//
// Returns:
// - []byte: A Claude-compatible JSON response.
func ConvertGeminiCLIResponseToClaudeNonStream(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, _ *any) []byte {
toolNameMap := util.SanitizedToolNameMap(originalRequestRawJSON)
_ = requestRawJSON
root := gjson.ParseBytes(rawJSON)
out := []byte(`{"id":"","type":"message","role":"assistant","model":"","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":0,"output_tokens":0}}`)
out, _ = sjson.SetBytes(out, "id", root.Get("response.responseId").String())
out, _ = sjson.SetBytes(out, "model", root.Get("response.modelVersion").String())
inputTokens := root.Get("response.usageMetadata.promptTokenCount").Int()
outputTokens := root.Get("response.usageMetadata.candidatesTokenCount").Int() + root.Get("response.usageMetadata.thoughtsTokenCount").Int()
out, _ = sjson.SetBytes(out, "usage.input_tokens", inputTokens)
out, _ = sjson.SetBytes(out, "usage.output_tokens", outputTokens)
parts := root.Get("response.candidates.0.content.parts")
textBuilder := strings.Builder{}
thinkingBuilder := strings.Builder{}
toolIDCounter := 0
hasToolCall := false
flushText := func() {
if textBuilder.Len() == 0 {
return
}
block := []byte(`{"type":"text","text":""}`)
block, _ = sjson.SetBytes(block, "text", textBuilder.String())
out, _ = sjson.SetRawBytes(out, "content.-1", block)
textBuilder.Reset()
}
flushThinking := func() {
if thinkingBuilder.Len() == 0 {
return
}
block := []byte(`{"type":"thinking","thinking":""}`)
block, _ = sjson.SetBytes(block, "thinking", thinkingBuilder.String())
out, _ = sjson.SetRawBytes(out, "content.-1", block)
thinkingBuilder.Reset()
}
if parts.IsArray() {
for _, part := range parts.Array() {
if text := part.Get("text"); text.Exists() && text.String() != "" {
if part.Get("thought").Bool() {
flushText()
thinkingBuilder.WriteString(text.String())
continue
}
flushThinking()
textBuilder.WriteString(text.String())
continue
}
if functionCall := part.Get("functionCall"); functionCall.Exists() {
flushThinking()
flushText()
hasToolCall = true
name := util.RestoreSanitizedToolName(toolNameMap, functionCall.Get("name").String())
toolIDCounter++
toolBlock := []byte(`{"type":"tool_use","id":"","name":"","input":{}}`)
toolBlock, _ = sjson.SetBytes(toolBlock, "id", fmt.Sprintf("tool_%d", toolIDCounter))
toolBlock, _ = sjson.SetBytes(toolBlock, "name", name)
inputRaw := "{}"
if args := functionCall.Get("args"); args.Exists() && gjson.Valid(args.Raw) && args.IsObject() {
inputRaw = args.Raw
}
toolBlock, _ = sjson.SetRawBytes(toolBlock, "input", []byte(inputRaw))
out, _ = sjson.SetRawBytes(out, "content.-1", toolBlock)
continue
}
}
}
flushThinking()
flushText()
stopReason := "end_turn"
if hasToolCall {
stopReason = "tool_use"
} else {
if finish := root.Get("response.candidates.0.finishReason"); finish.Exists() {
switch finish.String() {
case "MAX_TOKENS":
stopReason = "max_tokens"
case "STOP", "FINISH_REASON_UNSPECIFIED", "UNKNOWN":
stopReason = "end_turn"
default:
stopReason = "end_turn"
}
}
}
out, _ = sjson.SetBytes(out, "stop_reason", stopReason)
if inputTokens == int64(0) && outputTokens == int64(0) && !root.Get("response.usageMetadata").Exists() {
out, _ = sjson.DeleteBytes(out, "usage")
}
return out
}
func ClaudeTokenCount(ctx context.Context, count int64) []byte {
return translatorcommon.ClaudeInputTokensJSON(count)
}