diff --git a/internal/runtime/executor/antigravity_executor.go b/internal/runtime/executor/antigravity_executor.go index 356c9320..2b4ec748 100644 --- a/internal/runtime/executor/antigravity_executor.go +++ b/internal/runtime/executor/antigravity_executor.go @@ -17,6 +17,7 @@ import ( "net/url" "strconv" "strings" + "sync" "time" "github.com/google/uuid" @@ -41,12 +42,15 @@ const ( antigravityModelsPath = "/v1internal:fetchAvailableModels" antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com" antigravityClientSecret = "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" - defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64" + defaultAntigravityAgent = "antigravity/1.104.0 darwin/arm64" antigravityAuthType = "antigravity" refreshSkew = 3000 * time.Second ) -var randSource = rand.New(rand.NewSource(time.Now().UnixNano())) +var ( + randSource = rand.New(rand.NewSource(time.Now().UnixNano())) + randSourceMutex sync.Mutex +) // AntigravityExecutor proxies requests to the antigravity upstream. type AntigravityExecutor struct { @@ -1224,7 +1228,9 @@ func generateRequestID() string { } func generateSessionID() string { + randSourceMutex.Lock() n := randSource.Int63n(9_000_000_000_000_000_000) + randSourceMutex.Unlock() return "-" + strconv.FormatInt(n, 10) } @@ -1248,8 +1254,10 @@ func generateStableSessionID(payload []byte) string { func generateProjectID() string { adjectives := []string{"useful", "bright", "swift", "calm", "bold"} nouns := []string{"fuze", "wave", "spark", "flow", "core"} + randSourceMutex.Lock() adj := adjectives[randSource.Intn(len(adjectives))] noun := nouns[randSource.Intn(len(nouns))] + randSourceMutex.Unlock() randomPart := strings.ToLower(uuid.NewString())[:5] return adj + "-" + noun + "-" + randomPart } diff --git a/internal/translator/antigravity/claude/antigravity_claude_response.go b/internal/translator/antigravity/claude/antigravity_claude_response.go index ea4ba38e..80660378 100644 --- a/internal/translator/antigravity/claude/antigravity_claude_response.go +++ b/internal/translator/antigravity/claude/antigravity_claude_response.go @@ -35,6 +35,7 @@ type Params struct { CandidatesTokenCount int64 // Cached candidate token count from usage metadata ThoughtsTokenCount int64 // Cached thinking token count from usage metadata TotalTokenCount int64 // Cached total token count from usage metadata + CachedTokenCount int64 // Cached content token count (indicates prompt caching) HasSentFinalEvents bool // Indicates if final content/message events have been sent HasToolUse bool // Indicates if tool use was observed in the stream HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output @@ -274,6 +275,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq params.CandidatesTokenCount = usageResult.Get("candidatesTokenCount").Int() params.ThoughtsTokenCount = usageResult.Get("thoughtsTokenCount").Int() params.TotalTokenCount = usageResult.Get("totalTokenCount").Int() + params.CachedTokenCount = usageResult.Get("cachedContentTokenCount").Int() if params.CandidatesTokenCount == 0 && params.TotalTokenCount > 0 { params.CandidatesTokenCount = params.TotalTokenCount - params.PromptTokenCount - params.ThoughtsTokenCount if params.CandidatesTokenCount < 0 { @@ -322,6 +324,14 @@ func appendFinalEvents(params *Params, output *string, force bool) { *output = *output + "event: message_delta\n" *output = *output + "data: " delta := fmt.Sprintf(`{"type":"message_delta","delta":{"stop_reason":"%s","stop_sequence":null},"usage":{"input_tokens":%d,"output_tokens":%d}}`, stopReason, params.PromptTokenCount, usageOutputTokens) + // Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working) + if params.CachedTokenCount > 0 { + var err error + delta, err = sjson.Set(delta, "usage.cache_read_input_tokens", params.CachedTokenCount) + if err != nil { + log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err) + } + } *output = *output + delta + "\n\n\n" params.HasSentFinalEvents = true @@ -361,6 +371,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or candidateTokens := root.Get("response.usageMetadata.candidatesTokenCount").Int() thoughtTokens := root.Get("response.usageMetadata.thoughtsTokenCount").Int() totalTokens := root.Get("response.usageMetadata.totalTokenCount").Int() + cachedTokens := root.Get("response.usageMetadata.cachedContentTokenCount").Int() outputTokens := candidateTokens + thoughtTokens if outputTokens == 0 && totalTokens > 0 { outputTokens = totalTokens - promptTokens @@ -374,6 +385,14 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or responseJSON, _ = sjson.Set(responseJSON, "model", root.Get("response.modelVersion").String()) responseJSON, _ = sjson.Set(responseJSON, "usage.input_tokens", promptTokens) responseJSON, _ = sjson.Set(responseJSON, "usage.output_tokens", outputTokens) + // Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working) + if cachedTokens > 0 { + var err error + responseJSON, err = sjson.Set(responseJSON, "usage.cache_read_input_tokens", cachedTokens) + if err != nil { + log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err) + } + } contentArrayInitialized := false ensureContentArray := func() { diff --git a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go index b699a4a0..7282ebc8 100644 --- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go +++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go @@ -13,6 +13,8 @@ import ( "sync/atomic" "time" + log "github.com/sirupsen/logrus" + . "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions" "github.com/tidwall/gjson" "github.com/tidwall/sjson" @@ -93,10 +95,19 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq } promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() + cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int() template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } + // Include cached token count if present (indicates prompt caching is working) + if cachedTokenCount > 0 { + var err error + template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount) + if err != nil { + log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err) + } + } } // Process the main content part of the response. diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go index 3c615b54..d710b1d6 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go @@ -13,6 +13,7 @@ import ( "sync/atomic" "time" + log "github.com/sirupsen/logrus" "github.com/tidwall/gjson" "github.com/tidwall/sjson" ) @@ -96,10 +97,19 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR } promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() + cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int() template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } + // Include cached token count if present (indicates prompt caching is working) + if cachedTokenCount > 0 { + var err error + template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount) + if err != nil { + log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err) + } + } } // Process the main content part of the response. @@ -240,10 +250,19 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina } promptTokenCount := usageResult.Get("promptTokenCount").Int() thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int() + cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int() template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount) if thoughtsTokenCount > 0 { template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount) } + // Include cached token count if present (indicates prompt caching is working) + if cachedTokenCount > 0 { + var err error + template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount) + if err != nil { + log.Warnf("gemini openai response: failed to set cached_tokens in non-streaming: %v", err) + } + } } // Process the main content part of the response.