fix(executor): add transient 429 resource exhausted handling with retry logic

This commit is contained in:
Luis Pater
2026-04-08 02:48:53 +08:00
parent c8b7e2b8d6
commit 91e7591955
2 changed files with 154 additions and 7 deletions
@@ -261,6 +261,28 @@ func classifyAntigravity429(body []byte) antigravity429Category {
return antigravity429Unknown
}
func antigravityHasQuotaResetDelayOrModelInfo(body []byte) bool {
if len(body) == 0 {
return false
}
details := gjson.GetBytes(body, "error.details")
if !details.Exists() || !details.IsArray() {
return false
}
for _, detail := range details.Array() {
if detail.Get("@type").String() != "type.googleapis.com/google.rpc.ErrorInfo" {
continue
}
if strings.TrimSpace(detail.Get("metadata.quotaResetDelay").String()) != "" {
return true
}
if strings.TrimSpace(detail.Get("metadata.model").String()) != "" {
return true
}
}
return false
}
func antigravityCreditsRetryEnabled(cfg *config.Config) bool {
return cfg != nil && cfg.QuotaExceeded.AntigravityCredits
}
@@ -362,6 +384,12 @@ func shouldMarkAntigravityCreditsExhausted(statusCode int, body []byte, reqErr e
lowerBody := strings.ToLower(string(body))
for _, keyword := range antigravityCreditsExhaustedKeywords {
if strings.Contains(lowerBody, keyword) {
if keyword == "resource has been exhausted" &&
statusCode == http.StatusTooManyRequests &&
classifyAntigravity429(body) == antigravity429Unknown &&
!antigravityHasQuotaResetDelayOrModelInfo(body) {
return false
}
return true
}
}
@@ -575,6 +603,14 @@ attemptLoop:
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
continue
}
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
delay := antigravityTransient429RetryDelay(attempt)
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return resp, errWait
}
continue attemptLoop
}
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
if idx+1 < len(baseURLs) {
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -742,6 +778,14 @@ attemptLoop:
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
continue
}
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
delay := antigravityTransient429RetryDelay(attempt)
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return resp, errWait
}
continue attemptLoop
}
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
if idx+1 < len(baseURLs) {
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -1158,6 +1202,14 @@ attemptLoop:
log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
continue
}
if antigravityShouldRetryTransientResourceExhausted429(httpResp.StatusCode, bodyBytes) && attempt+1 < attempts {
delay := antigravityTransient429RetryDelay(attempt)
log.Debugf("antigravity executor: transient 429 resource exhausted for model %s, retrying in %s (attempt %d/%d)", baseModel, delay, attempt+1, attempts)
if errWait := antigravityWait(ctx, delay); errWait != nil {
return nil, errWait
}
continue attemptLoop
}
if antigravityShouldRetryNoCapacity(httpResp.StatusCode, bodyBytes) {
if idx+1 < len(baseURLs) {
log.Debugf("antigravity executor: no capacity on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
@@ -1774,6 +1826,24 @@ func antigravityShouldRetryNoCapacity(statusCode int, body []byte) bool {
return strings.Contains(msg, "no capacity available")
}
func antigravityShouldRetryTransientResourceExhausted429(statusCode int, body []byte) bool {
if statusCode != http.StatusTooManyRequests {
return false
}
if len(body) == 0 {
return false
}
if classifyAntigravity429(body) != antigravity429Unknown {
return false
}
status := strings.TrimSpace(gjson.GetBytes(body, "error.status").String())
if !strings.EqualFold(status, "RESOURCE_EXHAUSTED") {
return false
}
msg := strings.ToLower(string(body))
return strings.Contains(msg, "resource has been exhausted")
}
func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
if attempt < 0 {
attempt = 0
@@ -1785,6 +1855,17 @@ func antigravityNoCapacityRetryDelay(attempt int) time.Duration {
return delay
}
func antigravityTransient429RetryDelay(attempt int) time.Duration {
if attempt < 0 {
attempt = 0
}
delay := time.Duration(attempt+1) * 100 * time.Millisecond
if delay > 500*time.Millisecond {
delay = 500 * time.Millisecond
}
return delay
}
func antigravityWait(ctx context.Context, wait time.Duration) error {
if wait <= 0 {
return nil