fix(executor): handle 429 Retry-After header and default retry logic for quota exhaustion

- Added proper parsing of `Retry-After` headers for 429 responses.
- Set default retry duration when "disable cooling" is active on quota exhaustion.
- Updated tests to verify `Retry-After` handling and default behavior.
This commit is contained in:
Luis Pater
2026-04-11 21:04:55 +08:00
parent 9ded75d335
commit 0ab1f5412f
2 changed files with 283 additions and 0 deletions
@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"net/http"
"strconv"
"strings"
"sync"
"time"
@@ -153,6 +154,40 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
return errCode, retryAfter
}
func qwenDisableCooling(cfg *config.Config, auth *cliproxyauth.Auth) bool {
if auth != nil {
if override, ok := auth.DisableCoolingOverride(); ok {
return override
}
}
if cfg == nil {
return false
}
return cfg.DisableCooling
}
func parseRetryAfterHeader(header http.Header, now time.Time) *time.Duration {
raw := strings.TrimSpace(header.Get("Retry-After"))
if raw == "" {
return nil
}
if seconds, err := strconv.Atoi(raw); err == nil {
if seconds <= 0 {
return nil
}
d := time.Duration(seconds) * time.Second
return &d
}
if at, err := http.ParseTime(raw); err == nil {
if !at.After(now) {
return nil
}
d := at.Sub(now)
return &d
}
return nil
}
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
// It always injects the default system prompt and merges any user-provided system messages
// into the injected system message content to satisfy Qwen's strict message ordering rules.
@@ -384,6 +419,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
}
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
if errCode == http.StatusTooManyRequests && retryAfter == nil {
retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
}
if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
defaultRetryAfter := time.Second
retryAfter = &defaultRetryAfter
}
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
@@ -511,6 +553,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
}
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
if errCode == http.StatusTooManyRequests && retryAfter == nil {
retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
}
if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
defaultRetryAfter := time.Second
retryAfter = &defaultRetryAfter
}
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}