fix(antigravity): refine 429 handling and credits fallback

Includes: restore SDK docs under docs/; update antigravity executor credits tests; gofmt.
This commit is contained in:
ZTXBOSS666
2026-04-09 21:48:32 +08:00
committed by GitHub
parent 1dba2d0f81
commit 30e94b6792
3 changed files with 449 additions and 96 deletions
-1
View File
@@ -109,7 +109,6 @@ enable-gemini-cli-endpoint: false
# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts. # When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
nonstream-keepalive-interval: 0 nonstream-keepalive-interval: 0
# Streaming behavior (SSE keep-alives + safe bootstrap retries). # Streaming behavior (SSE keep-alives + safe bootstrap retries).
# streaming: # streaming:
# keepalive-seconds: 15 # Default: 0 (disabled). <= 0 disables keep-alives. # keepalive-seconds: 15 # Default: 0 (disabled). <= 0 disables keep-alives.
+401 -48
View File
@@ -50,22 +50,46 @@ const (
antigravityAuthType = "antigravity" antigravityAuthType = "antigravity"
refreshSkew = 3000 * time.Second refreshSkew = 3000 * time.Second
antigravityCreditsRetryTTL = 5 * time.Hour antigravityCreditsRetryTTL = 5 * time.Hour
antigravityCreditsAutoDisableDuration = 5 * time.Hour
antigravityShortQuotaCooldownThreshold = 5 * time.Minute
antigravityInstantRetryThreshold = 3 * time.Second
// systemInstruction = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.**Absolute paths only****Proactiveness**" // systemInstruction = "You are Antigravity, a powerful agentic AI coding assistant designed by the Google Deepmind team working on Advanced Agentic Coding.You are pair programming with a USER to solve their coding task. The task may require creating a new codebase, modifying or debugging an existing codebase, or simply answering a question.**Absolute paths only****Proactiveness**"
) )
type antigravity429Category string type antigravity429Category string
type antigravityCreditsFailureState struct {
Count int
DisabledUntil time.Time
PermanentlyDisabled bool
ExplicitBalanceExhausted bool
}
type antigravity429DecisionKind string
const ( const (
antigravity429Unknown antigravity429Category = "unknown" antigravity429Unknown antigravity429Category = "unknown"
antigravity429RateLimited antigravity429Category = "rate_limited" antigravity429RateLimited antigravity429Category = "rate_limited"
antigravity429QuotaExhausted antigravity429Category = "quota_exhausted" antigravity429QuotaExhausted antigravity429Category = "quota_exhausted"
antigravity429SoftRateLimit antigravity429Category = "soft_rate_limit"
antigravity429DecisionSoftRetry antigravity429DecisionKind = "soft_retry"
antigravity429DecisionInstantRetrySameAuth antigravity429DecisionKind = "instant_retry_same_auth"
antigravity429DecisionShortCooldownSwitchAuth antigravity429DecisionKind = "short_cooldown_switch_auth"
antigravity429DecisionFullQuotaExhausted antigravity429DecisionKind = "full_quota_exhausted"
) )
type antigravity429Decision struct {
kind antigravity429DecisionKind
retryAfter *time.Duration
reason string
}
var ( var (
randSource = rand.New(rand.NewSource(time.Now().UnixNano())) randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
randSourceMutex sync.Mutex randSourceMutex sync.Mutex
antigravityCreditsExhaustedByAuth sync.Map antigravityCreditsFailureByAuth sync.Map
antigravityPreferCreditsByModel sync.Map antigravityPreferCreditsByModel sync.Map
antigravityShortCooldownByAuth sync.Map
antigravityQuotaExhaustedKeywords = []string{ antigravityQuotaExhaustedKeywords = []string{
"quota_exhausted", "quota_exhausted",
"quota exhausted", "quota exhausted",
@@ -229,36 +253,77 @@ func injectEnabledCreditTypes(payload []byte) []byte {
} }
func classifyAntigravity429(body []byte) antigravity429Category { func classifyAntigravity429(body []byte) antigravity429Category {
if len(body) == 0 { switch decideAntigravity429(body).kind {
case antigravity429DecisionInstantRetrySameAuth, antigravity429DecisionShortCooldownSwitchAuth:
return antigravity429RateLimited
case antigravity429DecisionFullQuotaExhausted:
return antigravity429QuotaExhausted
case antigravity429DecisionSoftRetry:
return antigravity429SoftRateLimit
default:
return antigravity429Unknown return antigravity429Unknown
} }
}
func decideAntigravity429(body []byte) antigravity429Decision {
decision := antigravity429Decision{kind: antigravity429DecisionSoftRetry}
if len(body) == 0 {
return decision
}
if retryAfter, parseErr := parseRetryDelay(body); parseErr == nil && retryAfter != nil {
decision.retryAfter = retryAfter
}
lowerBody := strings.ToLower(string(body)) lowerBody := strings.ToLower(string(body))
for _, keyword := range antigravityQuotaExhaustedKeywords { for _, keyword := range antigravityQuotaExhaustedKeywords {
if strings.Contains(lowerBody, keyword) { if strings.Contains(lowerBody, keyword) {
return antigravity429QuotaExhausted decision.kind = antigravity429DecisionFullQuotaExhausted
decision.reason = "quota_exhausted"
return decision
} }
} }
status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String()) status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String())
if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") { if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") {
return antigravity429Unknown return decision
} }
details := gjson.GetBytes(body, "error.details") details := gjson.GetBytes(body, "error.details")
if !details.Exists() || !details.IsArray() { if !details.Exists() || !details.IsArray() {
return antigravity429Unknown decision.kind = antigravity429DecisionSoftRetry
return decision
} }
for _, detail := range details.Array() { for _, detail := range details.Array() {
if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" { if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" {
continue continue
} }
reason := strings.TrimSpace(detail.Get("reason").String()) reason := strings.TrimSpace(detail.Get("reason").String())
if strings.EqualFold(reason, "QUOTA_EXHAUSTED") { decision.reason = reason
return antigravity429QuotaExhausted switch {
case strings.EqualFold(reason, "QUOTA_EXHAUSTED"):
decision.kind = antigravity429DecisionFullQuotaExhausted
return decision
case strings.EqualFold(reason, "RATE_LIMIT_EXCEEDED"):
if decision.retryAfter == nil {
decision.kind = antigravity429DecisionSoftRetry
return decision
} }
if strings.EqualFold(reason, "RATE_LIMIT_EXCEEDED") { switch {
return antigravity429RateLimited case *decision.retryAfter < antigravityInstantRetryThreshold:
decision.kind = antigravity429DecisionInstantRetrySameAuth
case *decision.retryAfter < antigravityShortQuotaCooldownThreshold:
decision.kind = antigravity429DecisionShortCooldownSwitchAuth
default:
decision.kind = antigravity429DecisionFullQuotaExhausted
}
return decision
} }
} }
return antigravity429Unknown
decision.kind = antigravity429DecisionSoftRetry
return decision
} }
func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool { func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool {
@@ -287,38 +352,91 @@ func antigravityCreditsRetryEnabled(cfg *config.Config) bool {
return cfg != nil && cfg.QuotaExceeded.AntigravityCredits return cfg != nil && cfg.QuotaExceeded.AntigravityCredits
} }
func antigravityCreditsExhausted(auth *cliproxyauth.Auth, now time.Time) bool { func antigravityCreditsFailureStateForAuth(auth *cliproxyauth.Auth) (string, antigravityCreditsFailureState, bool) {
if auth == nil || strings.TrimSpace(auth.ID) == "" { if auth == nil || strings.TrimSpace(auth.ID) == "" {
return false return "", antigravityCreditsFailureState{}, false
} }
value, ok := antigravityCreditsExhaustedByAuth.Load(auth.ID) authID := strings.TrimSpace(auth.ID)
value, ok := antigravityCreditsFailureByAuth.Load(authID)
if !ok {
return authID, antigravityCreditsFailureState{}, true
}
state, ok := value.(antigravityCreditsFailureState)
if !ok {
antigravityCreditsFailureByAuth.Delete(authID)
return authID, antigravityCreditsFailureState{}, true
}
return authID, state, true
}
func antigravityCreditsDisabled(auth *cliproxyauth.Auth, now time.Time) bool {
authID, state, ok := antigravityCreditsFailureStateForAuth(auth)
if !ok { if !ok {
return false return false
} }
until, ok := value.(time.Time) if state.PermanentlyDisabled {
if !ok || until.IsZero() {
antigravityCreditsExhaustedByAuth.Delete(auth.ID)
return false
}
if !until.After(now) {
antigravityCreditsExhaustedByAuth.Delete(auth.ID)
return false
}
return true return true
}
if state.DisabledUntil.IsZero() {
return false
}
if state.DisabledUntil.After(now) {
return true
}
antigravityCreditsFailureByAuth.Delete(authID)
return false
} }
func markAntigravityCreditsExhausted(auth *cliproxyauth.Auth, now time.Time) { func recordAntigravityCreditsFailure(auth *cliproxyauth.Auth, now time.Time) {
authID, state, ok := antigravityCreditsFailureStateForAuth(auth)
if !ok {
return
}
if state.PermanentlyDisabled {
antigravityCreditsFailureByAuth.Store(authID, state)
return
}
state.Count++
state.DisabledUntil = now.Add(antigravityCreditsAutoDisableDuration)
antigravityCreditsFailureByAuth.Store(authID, state)
}
func clearAntigravityCreditsFailureState(auth *cliproxyauth.Auth) {
if auth == nil || strings.TrimSpace(auth.ID) == "" { if auth == nil || strings.TrimSpace(auth.ID) == "" {
return return
} }
antigravityCreditsExhaustedByAuth.Store(auth.ID, now.Add(antigravityCreditsRetryTTL)) antigravityCreditsFailureByAuth.Delete(strings.TrimSpace(auth.ID))
} }
func markAntigravityCreditsPermanentlyDisabled(auth *cliproxyauth.Auth) {
func clearAntigravityCreditsExhausted(auth *cliproxyauth.Auth) {
if auth == nil || strings.TrimSpace(auth.ID) == "" { if auth == nil || strings.TrimSpace(auth.ID) == "" {
return return
} }
antigravityCreditsExhaustedByAuth.Delete(auth.ID) authID := strings.TrimSpace(auth.ID)
state := antigravityCreditsFailureState{
PermanentlyDisabled: true,
ExplicitBalanceExhausted: true,
}
antigravityCreditsFailureByAuth.Store(authID, state)
}
func antigravityHasExplicitCreditsBalanceExhaustedReason(body []byte) bool {
if len(body) == 0 {
return false
}
details := gjson.GetBytes(body, "error.details")
if !details.Exists() || !details.IsArray() {
return false
}
for _, detail := range details.Array() {
if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" {
continue
}
reason := strings.TrimSpace(detail.Get("reason").String())
if strings.EqualFold(reason, "INSUFFICIENT_G1_CREDITS_BALANCE") {
return true
}
}
return false
} }
func antigravityPreferCreditsKey(auth *cliproxyauth.Auth, modelName string) string { func antigravityPreferCreditsKey(auth *cliproxyauth.Auth, modelName string) string {
@@ -386,7 +504,7 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e
if strings.Contains(lowerBody, keyword) { if strings.Contains(lowerBody, keyword) {
if keyword == "resource has been exhausted" && if keyword == "resource has been exhausted" &&
statusCode == http.StatusTooManyRequests && statusCode == http.StatusTooManyRequests &&
classifyAntigravity429(body) == antigravity429Unknown && decideAntigravity429(body).kind == antigravity429DecisionSoftRetry &&
!antigravityHasQuotaResetDelayOrModelInfo(body) { !antigravityHasQuotaResetDelayOrModelInfo(body) {
return false return false
} }
@@ -421,11 +539,23 @@ func (e *AntigravityExecutor) attemptCreditsFallback(
if !antigravityCreditsRetryEnabled(e.cfg) { if !antigravityCreditsRetryEnabled(e.cfg) {
return nil, false return nil, false
} }
if classifyAntigravity429(originalBody) != antigravity429QuotaExhausted { if decideAntigravity429(originalBody).kind != antigravity429DecisionFullQuotaExhausted {
return nil, false return nil, false
} }
now := time.Now() now := time.Now()
if antigravityCreditsExhausted(auth, now) { if shouldForcePermanentDisableCredits(originalBody) {
clearAntigravityPreferCredits(auth, modelName)
markAntigravityCreditsPermanentlyDisabled(auth)
return nil, false
}
if antigravityHasExplicitCreditsBalanceExhaustedReason(originalBody) {
clearAntigravityPreferCredits(auth, modelName)
markAntigravityCreditsPermanentlyDisabled(auth)
return nil, false
}
if antigravityCreditsDisabled(auth, now) {
return nil, false return nil, false
} }
creditsPayload := injectEnabledCreditTypes(payload) creditsPayload := injectEnabledCreditTypes(payload)
@@ -436,17 +566,21 @@ func (e *AntigravityExecutor) attemptCreditsFallback(
httpReq, errReq := e.buildRequest(ctx, auth, token, modelName, creditsPayload, stream, alt, baseURL) httpReq, errReq := e.buildRequest(ctx, auth, token, modelName, creditsPayload, stream, alt, baseURL)
if errReq != nil { if errReq != nil {
helps.RecordAPIResponseError(ctx, e.cfg, errReq) helps.RecordAPIResponseError(ctx, e.cfg, errReq)
clearAntigravityPreferCredits(auth, modelName)
recordAntigravityCreditsFailure(auth, now)
return nil, true return nil, true
} }
httpResp, errDo := httpClient.Do(httpReq) httpResp, errDo := httpClient.Do(httpReq)
if errDo != nil { if errDo != nil {
helps.RecordAPIResponseError(ctx, e.cfg, errDo) helps.RecordAPIResponseError(ctx, e.cfg, errDo)
clearAntigravityPreferCredits(auth, modelName)
recordAntigravityCreditsFailure(auth, now)
return nil, true return nil, true
} }
if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices {
retryAfter, _ := parseRetryDelay(originalBody) retryAfter, _ := parseRetryDelay(originalBody)
markAntigravityPreferCredits(auth, modelName, now, retryAfter) markAntigravityPreferCredits(auth, modelName, now, retryAfter)
clearAntigravityCreditsExhausted(auth) clearAntigravityCreditsFailureState(auth)
return httpResp, true return httpResp, true
} }
@@ -457,14 +591,60 @@ func (e *AntigravityExecutor) attemptCreditsFallback(
} }
if errRead != nil { if errRead != nil {
helps.RecordAPIResponseError(ctx, e.cfg, errRead) helps.RecordAPIResponseError(ctx, e.cfg, errRead)
clearAntigravityPreferCredits(auth, modelName)
recordAntigravityCreditsFailure(auth, now)
return nil, true return nil, true
} }
helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes)
if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { if shouldForcePermanentDisableCredits(bodyBytes) {
clearAntigravityPreferCredits(auth, modelName) clearAntigravityPreferCredits(auth, modelName)
markAntigravityCreditsExhausted(auth, now) markAntigravityCreditsPermanentlyDisabled(auth)
}
return nil, true return nil, true
}
if antigravityHasExplicitCreditsBalanceExhaustedReason(bodyBytes) {
clearAntigravityPreferCredits(auth, modelName)
markAntigravityCreditsPermanentlyDisabled(auth)
return nil, true
}
clearAntigravityPreferCredits(auth, modelName)
recordAntigravityCreditsFailure(auth, now)
return nil, true
}
func (e *AntigravityExecutor) handleDirectCreditsFailure(ctx context.Context, auth *cliproxyauth.Auth, modelName string, reqErr error) {
if reqErr != nil {
if shouldForcePermanentDisableCredits(reqErrBody(reqErr)) {
clearAntigravityPreferCredits(auth, modelName)
markAntigravityCreditsPermanentlyDisabled(auth)
return
}
if antigravityHasExplicitCreditsBalanceExhaustedReason(reqErrBody(reqErr)) {
clearAntigravityPreferCredits(auth, modelName)
markAntigravityCreditsPermanentlyDisabled(auth)
return
}
helps.RecordAPIResponseError(ctx, e.cfg, reqErr)
}
clearAntigravityPreferCredits(auth, modelName)
recordAntigravityCreditsFailure(auth, time.Now())
}
func reqErrBody(reqErr error) []byte {
if reqErr == nil {
return nil
}
msg := reqErr.Error()
if strings.TrimSpace(msg) == "" {
return nil
}
return []byte(msg)
}
func shouldForcePermanentDisableCredits(body []byte) bool {
return antigravityHasExplicitCreditsBalanceExhaustedReason(body)
} }
// Execute performs a non-streaming request to the Antigravity API. // Execute performs a non-streaming request to the Antigravity API.
@@ -473,8 +653,13 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"} return resp, statusErr{code: http.StatusNotImplemented, msg: "/responses/compact not supported"}
} }
baseModel := thinking.ParseSuffix(req.Model).ModelName baseModel := thinking.ParseSuffix(req.Model).ModelName
isClaude := strings.Contains(strings.ToLower(baseModel), "claude") if inCooldown, remaining := antigravityIsInShortCooldown(auth, baseModel, time.Now()); inCooldown {
log.Debugf("antigravity executor: auth %s in short cooldown for model %s (%s remaining), returning 429 to switch auth", auth.ID, baseModel, remaining)
d := remaining
return resp, statusErr{code: http.StatusTooManyRequests, msg: fmt.Sprintf("auth in short cooldown, %s remaining", remaining), retryAfter: &d}
}
isClaude := strings.Contains(strings.ToLower(baseModel), "claude")
if isClaude || strings.Contains(baseModel, "gemini-3-pro") || strings.Contains(baseModel, "gemini-3.1-flash-image") { if isClaude || strings.Contains(baseModel, "gemini-3-pro") || strings.Contains(baseModel, "gemini-3.1-flash-image") {
return e.executeClaudeNonStream(ctx, auth, req, opts) return e.executeClaudeNonStream(ctx, auth, req, opts)
} }
@@ -511,7 +696,6 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
baseURLs := antigravityBaseURLFallbackOrder(auth) baseURLs := antigravityBaseURLFallbackOrder(auth)
httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0) httpClient := newAntigravityHTTPClient(ctx, e.cfg, auth, 0)
attempts := antigravityRetryAttempts(auth, e.cfg) attempts := antigravityRetryAttempts(auth, e.cfg)
attemptLoop: attemptLoop:
@@ -529,6 +713,7 @@ attemptLoop:
usedCreditsDirect = true usedCreditsDirect = true
} }
} }
httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, requestPayload, false, opts.Alt, baseURL) httpReq, errReq := e.buildRequest(ctx, auth, token, baseModel, requestPayload, false, opts.Alt, baseURL)
if errReq != nil { if errReq != nil {
err = errReq err = errReq
@@ -565,11 +750,29 @@ attemptLoop:
helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes)
if httpResp.StatusCode == http.StatusTooManyRequests { if httpResp.StatusCode == http.StatusTooManyRequests {
if usedCreditsDirect { decision := decideAntigravity429(bodyBytes)
if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) { switch decision.kind {
clearAntigravityPreferCredits(auth, baseModel) case antigravity429DecisionInstantRetrySameAuth:
markAntigravityCreditsExhausted(auth, time.Now()) if attempt+1 < attempts {
if decision.retryAfter != nil && *decision.retryAfter > 0 {
wait := antigravityInstantRetryDelay(*decision.retryAfter)
log.Debugf("antigravity executor: instant retry for model %s, waiting %s", baseModel, wait)
if errWait := antigravityWait(ctx, wait); errWait != nil {
return resp, errWait
} }
}
continue attemptLoop
}
case antigravity429DecisionShortCooldownSwitchAuth:
if decision.retryAfter != nil && *decision.retryAfter > 0 {
markAntigravityShortCooldown(auth, baseModel, time.Now(), *decision.retryAfter)
log.Debugf("antigravity executor: short quota cooldown (%s) for model %s, recorded cooldown and skipping credits fallback", *decision.retryAfter, baseModel)
}
case antigravity429DecisionFullQuotaExhausted:
if usedCreditsDirect {
clearAntigravityPreferCredits(auth, baseModel)
recordAntigravityCreditsFailure(auth, time.Now())
} else { } else {
creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes) creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, false, opts.Alt, baseURL, bodyBytes)
if creditsResp != nil { if creditsResp != nil {
@@ -593,6 +796,7 @@ attemptLoop:
} }
} }
} }
}
if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices { if httpResp.StatusCode < http.StatusOK || httpResp.StatusCode >= http.StatusMultipleChoices {
log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes)) log.Debugf("antigravity executor: upstream error status: %d, body: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), bodyBytes))
@@ -625,6 +829,16 @@ attemptLoop:
continue attemptLoop continue attemptLoop
} }
} }
if antigravityShouldRetrySoftRateLimit(httpResp.StatusCode, bodyBytes) {
if attempt+1 < attempts {
delay := antigravitySoftRateLimitDelay(attempt)
log.Debugf("antigravity executor: soft rate limit for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return resp, errWait
}
continue attemptLoop
}
}
err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes)
return resp, err return resp, err
} }
@@ -654,6 +868,11 @@ attemptLoop:
// executeClaudeNonStream performs a claude non-streaming request to the Antigravity API. // executeClaudeNonStream performs a claude non-streaming request to the Antigravity API.
func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) { func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
baseModel := thinking.ParseSuffix(req.Model).ModelName baseModel := thinking.ParseSuffix(req.Model).ModelName
if inCooldown, remaining := antigravityIsInShortCooldown(auth, baseModel, time.Now()); inCooldown {
log.Debugf("antigravity executor: auth %s in short cooldown for model %s (%s remaining), returning 429 to switch auth", auth.ID, baseModel, remaining)
d := remaining
return resp, statusErr{code: http.StatusTooManyRequests, msg: fmt.Sprintf("auth in short cooldown, %s remaining", remaining), retryAfter: &d}
}
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
if errToken != nil { if errToken != nil {
@@ -755,11 +974,30 @@ attemptLoop:
} }
helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes)
if httpResp.StatusCode == http.StatusTooManyRequests { if httpResp.StatusCode == http.StatusTooManyRequests {
if usedCreditsDirect { decision := decideAntigravity429(bodyBytes)
if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) {
clearAntigravityPreferCredits(auth, baseModel) switch decision.kind {
markAntigravityCreditsExhausted(auth, time.Now()) case antigravity429DecisionInstantRetrySameAuth:
if attempt+1 < attempts {
if decision.retryAfter != nil && *decision.retryAfter > 0 {
wait := antigravityInstantRetryDelay(*decision.retryAfter)
log.Debugf("antigravity executor: instant retry for model %s, waiting %s", baseModel, wait)
if errWait := antigravityWait(ctx, wait); errWait != nil {
return resp, errWait
} }
}
continue attemptLoop
}
case antigravity429DecisionShortCooldownSwitchAuth:
if decision.retryAfter != nil && *decision.retryAfter > 0 {
markAntigravityShortCooldown(auth, baseModel, time.Now(), *decision.retryAfter)
log.Debugf("antigravity executor: short quota cooldown (%s) for model %s, recorded cooldown and skipping credits fallback", *decision.retryAfter, baseModel)
}
case antigravity429DecisionFullQuotaExhausted:
if usedCreditsDirect {
clearAntigravityPreferCredits(auth, baseModel)
recordAntigravityCreditsFailure(auth, time.Now())
} else { } else {
creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes)
if creditsResp != nil { if creditsResp != nil {
@@ -768,6 +1006,8 @@ attemptLoop:
} }
} }
} }
}
if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices {
goto streamSuccessClaudeNonStream goto streamSuccessClaudeNonStream
} }
@@ -800,6 +1040,16 @@ attemptLoop:
continue attemptLoop continue attemptLoop
} }
} }
if antigravityShouldRetrySoftRateLimit(httpResp.StatusCode, bodyBytes) {
if attempt+1 < attempts {
delay := antigravitySoftRateLimitDelay(attempt)
log.Debugf("antigravity executor: soft rate limit for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return resp, errWait
}
continue attemptLoop
}
}
err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes)
return resp, err return resp, err
} }
@@ -1079,6 +1329,11 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
baseModel := thinking.ParseSuffix(req.Model).ModelName baseModel := thinking.ParseSuffix(req.Model).ModelName
ctx = context.WithValue(ctx, "alt", "") ctx = context.WithValue(ctx, "alt", "")
if inCooldown, remaining := antigravityIsInShortCooldown(auth, baseModel, time.Now()); inCooldown {
log.Debugf("antigravity executor: auth %s in short cooldown for model %s (%s remaining), returning 429 to switch auth", auth.ID, baseModel, remaining)
d := remaining
return nil, statusErr{code: http.StatusTooManyRequests, msg: fmt.Sprintf("auth in short cooldown, %s remaining", remaining), retryAfter: &d}
}
token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth) token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
if errToken != nil { if errToken != nil {
@@ -1179,11 +1434,30 @@ attemptLoop:
} }
helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes) helps.AppendAPIResponseChunk(ctx, e.cfg, bodyBytes)
if httpResp.StatusCode == http.StatusTooManyRequests { if httpResp.StatusCode == http.StatusTooManyRequests {
if usedCreditsDirect { decision := decideAntigravity429(bodyBytes)
if shouldMarkAntigravityCreditsExhausted(httpResp.StatusCode, bodyBytes, nil) {
clearAntigravityPreferCredits(auth, baseModel) switch decision.kind {
markAntigravityCreditsExhausted(auth, time.Now()) case antigravity429DecisionInstantRetrySameAuth:
if attempt+1 < attempts {
if decision.retryAfter != nil && *decision.retryAfter > 0 {
wait := antigravityInstantRetryDelay(*decision.retryAfter)
log.Debugf("antigravity executor: instant retry for model %s, waiting %s", baseModel, wait)
if errWait := antigravityWait(ctx, wait); errWait != nil {
return nil, errWait
} }
}
continue attemptLoop
}
case antigravity429DecisionShortCooldownSwitchAuth:
if decision.retryAfter != nil && *decision.retryAfter > 0 {
markAntigravityShortCooldown(auth, baseModel, time.Now(), *decision.retryAfter)
log.Debugf("antigravity executor: short quota cooldown (%s) for model %s, recorded cooldown and skipping credits fallback", *decision.retryAfter, baseModel)
}
case antigravity429DecisionFullQuotaExhausted:
if usedCreditsDirect {
clearAntigravityPreferCredits(auth, baseModel)
recordAntigravityCreditsFailure(auth, time.Now())
} else { } else {
creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes) creditsResp, _ := e.attemptCreditsFallback(ctx, auth, httpClient, token, baseModel, translated, true, opts.Alt, baseURL, bodyBytes)
if creditsResp != nil { if creditsResp != nil {
@@ -1192,6 +1466,8 @@ attemptLoop:
} }
} }
} }
}
if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices { if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices {
goto streamSuccessExecuteStream goto streamSuccessExecuteStream
} }
@@ -1224,6 +1500,16 @@ attemptLoop:
continue attemptLoop continue attemptLoop
} }
} }
if antigravityShouldRetrySoftRateLimit(httpResp.StatusCode, bodyBytes) {
if attempt+1 < attempts {
delay := antigravitySoftRateLimitDelay(attempt)
log.Debugf("antigravity executor: soft rate limit for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return nil, errWait
}
continue attemptLoop
}
}
err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes) err = newAntigravityStatusErr(httpResp.StatusCode, bodyBytes)
return nil, err return nil, err
} }
@@ -1844,6 +2130,66 @@ func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body []
return strings.Contains(msg, "resource has been exhausted") return strings.Contains(msg, "resource has been exhausted")
} }
func antigravityShouldRetrySoftRateLimit(statusCode int, body []byte) bool {
if statusCode != http.StatusTooManyRequests {
return false
}
return decideAntigravity429(body).kind == antigravity429DecisionSoftRetry
}
func antigravitySoftRateLimitDelay(attempt int) time.Duration {
if attempt < 0 {
attempt = 0
}
base := time.Duration(attempt+1) * 500 * time.Millisecond
if base > 3*time.Second {
base = 3 * time.Second
}
return base
}
func antigravityShortCooldownKey(auth *cliproxyauth.Auth, modelName string) string {
if auth == nil {
return ""
}
authID := strings.TrimSpace(auth.ID)
modelName = strings.TrimSpace(modelName)
if authID == "" || modelName == "" {
return ""
}
return authID + "|" + modelName + "|sc"
}
func antigravityIsInShortCooldown(auth *cliproxyauth.Auth, modelName string, now time.Time) (bool, time.Duration) {
key := antigravityShortCooldownKey(auth, modelName)
if key == "" {
return false, 0
}
value, ok := antigravityShortCooldownByAuth.Load(key)
if !ok {
return false, 0
}
until, ok := value.(time.Time)
if !ok || until.IsZero() {
antigravityShortCooldownByAuth.Delete(key)
return false, 0
}
remaining := until.Sub(now)
if remaining <= 0 {
antigravityShortCooldownByAuth.Delete(key)
return false, 0
}
return true, remaining
}
func markAntigravityShortCooldown(auth *cliproxyauth.Auth, modelName string, now time.Time, duration time.Duration) {
key := antigravityShortCooldownKey(auth, modelName)
if key == "" {
return
}
antigravityShortCooldownByAuth.Store(key, now.Add(duration))
}
func antigravityNoCapacityRetryDelay(attempt int) time.Duration { func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
if attempt < 0 { if attempt < 0 {
attempt = 0 attempt = 0
@@ -1866,6 +2212,13 @@ func antigravityTransient429RetryDelay(attempt int) time.Duration {
return delay return delay
} }
func antigravityInstantRetryDelay(wait time.Duration) time.Duration {
if wait <= 0 {
return 0
}
return wait + 800*time.Millisecond
}
func antigravityWait(ctx context.Context, wait time.Duration) error { func antigravityWait(ctx context.Context, wait time.Duration) error {
if wait <= 0 { if wait <= 0 {
return nil return nil
@@ -17,8 +17,9 @@ import (
) )
func resetAntigravityCreditsRetryState() { func resetAntigravityCreditsRetryState() {
antigravityCreditsExhaustedByAuth = sync.Map{} antigravityCreditsFailureByAuth = sync.Map{}
antigravityPreferCreditsByModel = sync.Map{} antigravityPreferCreditsByModel = sync.Map{}
antigravityShortCooldownByAuth = sync.Map{}
} }
func TestClassifyAntigravity429(t *testing.T) { func TestClassifyAntigravity429(t *testing.T) {
@@ -58,10 +59,10 @@ func TestClassifyAntigravity429(t *testing.T) {
} }
}) })
t.Run("unknown", func(t *testing.T) { t.Run("unstructured 429 defaults to soft rate limit", func(t *testing.T) {
body := []byte(`{"error":{"message":"too many requests"}}`) body := []byte(`{"error":{"message":"too many requests"}}`)
if got := classifyAntigravity429(body); got != antigravity429Unknown { if got := classifyAntigravity429(body); got != antigravity429SoftRateLimit {
t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429Unknown) t.Fatalf("classifyAntigravity429() = %q, want %q", got, antigravity429SoftRateLimit)
} }
}) })
} }
@@ -255,7 +256,7 @@ func TestAntigravityExecute_SkipsCreditsRetryWhenAlreadyExhausted(t *testing.T)
"expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339), "expired": time.Now().Add(1 * time.Hour).Format(time.RFC3339),
}, },
} }
markAntigravityCreditsExhausted(auth, time.Now()) recordAntigravityCreditsFailure(auth, time.Now())
_, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{ _, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "gemini-2.5-flash", Model: "gemini-2.5-flash",