fix(executor): handle 429 Retry-After header and default retry logic for quota exhaustion
- Added proper parsing of `Retry-After` headers for 429 responses. - Set default retry duration when "disable cooling" is active on quota exhaustion. - Updated tests to verify `Retry-After` handling and default behavior.
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -153,6 +154,40 @@ func wrapQwenError(ctx context.Context, httpCode int, body []byte) (errCode int,
|
||||
return errCode, retryAfter
|
||||
}
|
||||
|
||||
func qwenDisableCooling(cfg *config.Config, auth *cliproxyauth.Auth) bool {
|
||||
if auth != nil {
|
||||
if override, ok := auth.DisableCoolingOverride(); ok {
|
||||
return override
|
||||
}
|
||||
}
|
||||
if cfg == nil {
|
||||
return false
|
||||
}
|
||||
return cfg.DisableCooling
|
||||
}
|
||||
|
||||
func parseRetryAfterHeader(header http.Header, now time.Time) *time.Duration {
|
||||
raw := strings.TrimSpace(header.Get("Retry-After"))
|
||||
if raw == "" {
|
||||
return nil
|
||||
}
|
||||
if seconds, err := strconv.Atoi(raw); err == nil {
|
||||
if seconds <= 0 {
|
||||
return nil
|
||||
}
|
||||
d := time.Duration(seconds) * time.Second
|
||||
return &d
|
||||
}
|
||||
if at, err := http.ParseTime(raw); err == nil {
|
||||
if !at.After(now) {
|
||||
return nil
|
||||
}
|
||||
d := at.Sub(now)
|
||||
return &d
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ensureQwenSystemMessage ensures the request has a single system message at the beginning.
|
||||
// It always injects the default system prompt and merges any user-provided system messages
|
||||
// into the injected system message content to satisfy Qwen's strict message ordering rules.
|
||||
@@ -384,6 +419,13 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
||||
}
|
||||
|
||||
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
|
||||
if errCode == http.StatusTooManyRequests && retryAfter == nil {
|
||||
retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
|
||||
}
|
||||
if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
|
||||
defaultRetryAfter := time.Second
|
||||
retryAfter = &defaultRetryAfter
|
||||
}
|
||||
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
|
||||
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
|
||||
@@ -511,6 +553,13 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
||||
}
|
||||
|
||||
errCode, retryAfter := wrapQwenError(ctx, httpResp.StatusCode, b)
|
||||
if errCode == http.StatusTooManyRequests && retryAfter == nil {
|
||||
retryAfter = parseRetryAfterHeader(httpResp.Header, time.Now())
|
||||
}
|
||||
if errCode == http.StatusTooManyRequests && retryAfter == nil && qwenDisableCooling(e.cfg, auth) && isQwenQuotaError(b) {
|
||||
defaultRetryAfter := time.Second
|
||||
retryAfter = &defaultRetryAfter
|
||||
}
|
||||
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d (mapped: %d), error message: %s", httpResp.StatusCode, errCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), b))
|
||||
|
||||
err = statusErr{code: errCode, msg: string(b), retryAfter: retryAfter}
|
||||
|
||||
Reference in New Issue
Block a user