Merge pull request #621 from dacsang97/fix/antigravity-prompt-caching
Antigravity Prompt Caching Fix
This commit is contained in:
@@ -17,6 +17,7 @@ import (
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@@ -41,12 +42,15 @@ const (
|
||||
antigravityModelsPath = "/v1internal:fetchAvailableModels"
|
||||
antigravityClientID = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
|
||||
antigravityClientSecret = "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"
|
||||
defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
|
||||
defaultAntigravityAgent = "antigravity/1.104.0 darwin/arm64"
|
||||
antigravityAuthType = "antigravity"
|
||||
refreshSkew = 3000 * time.Second
|
||||
)
|
||||
|
||||
var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
var (
|
||||
randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
randSourceMutex sync.Mutex
|
||||
)
|
||||
|
||||
// AntigravityExecutor proxies requests to the antigravity upstream.
|
||||
type AntigravityExecutor struct {
|
||||
@@ -1224,7 +1228,9 @@ func generateRequestID() string {
|
||||
}
|
||||
|
||||
func generateSessionID() string {
|
||||
randSourceMutex.Lock()
|
||||
n := randSource.Int63n(9_000_000_000_000_000_000)
|
||||
randSourceMutex.Unlock()
|
||||
return "-" + strconv.FormatInt(n, 10)
|
||||
}
|
||||
|
||||
@@ -1248,8 +1254,10 @@ func generateStableSessionID(payload []byte) string {
|
||||
func generateProjectID() string {
|
||||
adjectives := []string{"useful", "bright", "swift", "calm", "bold"}
|
||||
nouns := []string{"fuze", "wave", "spark", "flow", "core"}
|
||||
randSourceMutex.Lock()
|
||||
adj := adjectives[randSource.Intn(len(adjectives))]
|
||||
noun := nouns[randSource.Intn(len(nouns))]
|
||||
randSourceMutex.Unlock()
|
||||
randomPart := strings.ToLower(uuid.NewString())[:5]
|
||||
return adj + "-" + noun + "-" + randomPart
|
||||
}
|
||||
|
||||
@@ -35,6 +35,7 @@ type Params struct {
|
||||
CandidatesTokenCount int64 // Cached candidate token count from usage metadata
|
||||
ThoughtsTokenCount int64 // Cached thinking token count from usage metadata
|
||||
TotalTokenCount int64 // Cached total token count from usage metadata
|
||||
CachedTokenCount int64 // Cached content token count (indicates prompt caching)
|
||||
HasSentFinalEvents bool // Indicates if final content/message events have been sent
|
||||
HasToolUse bool // Indicates if tool use was observed in the stream
|
||||
HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
|
||||
@@ -274,6 +275,7 @@ func ConvertAntigravityResponseToClaude(_ context.Context, _ string, originalReq
|
||||
params.CandidatesTokenCount = usageResult.Get("candidatesTokenCount").Int()
|
||||
params.ThoughtsTokenCount = usageResult.Get("thoughtsTokenCount").Int()
|
||||
params.TotalTokenCount = usageResult.Get("totalTokenCount").Int()
|
||||
params.CachedTokenCount = usageResult.Get("cachedContentTokenCount").Int()
|
||||
if params.CandidatesTokenCount == 0 && params.TotalTokenCount > 0 {
|
||||
params.CandidatesTokenCount = params.TotalTokenCount - params.PromptTokenCount - params.ThoughtsTokenCount
|
||||
if params.CandidatesTokenCount < 0 {
|
||||
@@ -322,6 +324,14 @@ func appendFinalEvents(params *Params, output *string, force bool) {
|
||||
*output = *output + "event: message_delta\n"
|
||||
*output = *output + "data: "
|
||||
delta := fmt.Sprintf(`{"type":"message_delta","delta":{"stop_reason":"%s","stop_sequence":null},"usage":{"input_tokens":%d,"output_tokens":%d}}`, stopReason, params.PromptTokenCount, usageOutputTokens)
|
||||
// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
|
||||
if params.CachedTokenCount > 0 {
|
||||
var err error
|
||||
delta, err = sjson.Set(delta, "usage.cache_read_input_tokens", params.CachedTokenCount)
|
||||
if err != nil {
|
||||
log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err)
|
||||
}
|
||||
}
|
||||
*output = *output + delta + "\n\n\n"
|
||||
|
||||
params.HasSentFinalEvents = true
|
||||
@@ -361,6 +371,7 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
|
||||
candidateTokens := root.Get("response.usageMetadata.candidatesTokenCount").Int()
|
||||
thoughtTokens := root.Get("response.usageMetadata.thoughtsTokenCount").Int()
|
||||
totalTokens := root.Get("response.usageMetadata.totalTokenCount").Int()
|
||||
cachedTokens := root.Get("response.usageMetadata.cachedContentTokenCount").Int()
|
||||
outputTokens := candidateTokens + thoughtTokens
|
||||
if outputTokens == 0 && totalTokens > 0 {
|
||||
outputTokens = totalTokens - promptTokens
|
||||
@@ -374,6 +385,14 @@ func ConvertAntigravityResponseToClaudeNonStream(_ context.Context, _ string, or
|
||||
responseJSON, _ = sjson.Set(responseJSON, "model", root.Get("response.modelVersion").String())
|
||||
responseJSON, _ = sjson.Set(responseJSON, "usage.input_tokens", promptTokens)
|
||||
responseJSON, _ = sjson.Set(responseJSON, "usage.output_tokens", outputTokens)
|
||||
// Add cache_read_input_tokens if cached tokens are present (indicates prompt caching is working)
|
||||
if cachedTokens > 0 {
|
||||
var err error
|
||||
responseJSON, err = sjson.Set(responseJSON, "usage.cache_read_input_tokens", cachedTokens)
|
||||
if err != nil {
|
||||
log.Warnf("antigravity claude response: failed to set cache_read_input_tokens: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
contentArrayInitialized := false
|
||||
ensureContentArray := func() {
|
||||
|
||||
+11
@@ -13,6 +13,8 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
. "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/openai/chat-completions"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
@@ -93,10 +95,19 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
|
||||
}
|
||||
promptTokenCount := usageResult.Get("promptTokenCount").Int()
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
|
||||
if thoughtsTokenCount > 0 {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
|
||||
}
|
||||
// Include cached token count if present (indicates prompt caching is working)
|
||||
if cachedTokenCount > 0 {
|
||||
var err error
|
||||
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
|
||||
if err != nil {
|
||||
log.Warnf("antigravity openai response: failed to set cached_tokens: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -96,10 +97,19 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
|
||||
}
|
||||
promptTokenCount := usageResult.Get("promptTokenCount").Int()
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
|
||||
if thoughtsTokenCount > 0 {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
|
||||
}
|
||||
// Include cached token count if present (indicates prompt caching is working)
|
||||
if cachedTokenCount > 0 {
|
||||
var err error
|
||||
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
|
||||
if err != nil {
|
||||
log.Warnf("gemini openai response: failed to set cached_tokens in streaming: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
@@ -240,10 +250,19 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
|
||||
}
|
||||
promptTokenCount := usageResult.Get("promptTokenCount").Int()
|
||||
thoughtsTokenCount := usageResult.Get("thoughtsTokenCount").Int()
|
||||
cachedTokenCount := usageResult.Get("cachedContentTokenCount").Int()
|
||||
template, _ = sjson.Set(template, "usage.prompt_tokens", promptTokenCount+thoughtsTokenCount)
|
||||
if thoughtsTokenCount > 0 {
|
||||
template, _ = sjson.Set(template, "usage.completion_tokens_details.reasoning_tokens", thoughtsTokenCount)
|
||||
}
|
||||
// Include cached token count if present (indicates prompt caching is working)
|
||||
if cachedTokenCount > 0 {
|
||||
var err error
|
||||
template, err = sjson.Set(template, "usage.prompt_tokens_details.cached_tokens", cachedTokenCount)
|
||||
if err != nil {
|
||||
log.Warnf("gemini openai response: failed to set cached_tokens in non-streaming: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process the main content part of the response.
|
||||
|
||||
Reference in New Issue
Block a user