fix: fall back on model support errors during auth rotation
This commit is contained in:
+101
-43
@@ -1627,53 +1627,60 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
||||
}
|
||||
|
||||
statusCode := statusCodeFromResult(result.Error)
|
||||
switch statusCode {
|
||||
case 401:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "unauthorized"
|
||||
shouldSuspendModel = true
|
||||
case 402, 403:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "payment_required"
|
||||
shouldSuspendModel = true
|
||||
case 404:
|
||||
if isModelSupportResultError(result.Error) {
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "not_found"
|
||||
suspendReason = "model_not_supported"
|
||||
shouldSuspendModel = true
|
||||
case 429:
|
||||
var next time.Time
|
||||
backoffLevel := state.Quota.BackoffLevel
|
||||
if result.RetryAfter != nil {
|
||||
next = now.Add(*result.RetryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
}
|
||||
backoffLevel = nextLevel
|
||||
}
|
||||
state.NextRetryAfter = next
|
||||
state.Quota = QuotaState{
|
||||
Exceeded: true,
|
||||
Reason: "quota",
|
||||
NextRecoverAt: next,
|
||||
BackoffLevel: backoffLevel,
|
||||
}
|
||||
suspendReason = "quota"
|
||||
shouldSuspendModel = true
|
||||
setModelQuota = true
|
||||
case 408, 500, 502, 503, 504:
|
||||
if quotaCooldownDisabledForAuth(auth) {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(1 * time.Minute)
|
||||
} else {
|
||||
switch statusCode {
|
||||
case 401:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "unauthorized"
|
||||
shouldSuspendModel = true
|
||||
case 402, 403:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "payment_required"
|
||||
shouldSuspendModel = true
|
||||
case 404:
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "not_found"
|
||||
shouldSuspendModel = true
|
||||
case 429:
|
||||
var next time.Time
|
||||
backoffLevel := state.Quota.BackoffLevel
|
||||
if result.RetryAfter != nil {
|
||||
next = now.Add(*result.RetryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
}
|
||||
backoffLevel = nextLevel
|
||||
}
|
||||
state.NextRetryAfter = next
|
||||
state.Quota = QuotaState{
|
||||
Exceeded: true,
|
||||
Reason: "quota",
|
||||
NextRecoverAt: next,
|
||||
BackoffLevel: backoffLevel,
|
||||
}
|
||||
suspendReason = "quota"
|
||||
shouldSuspendModel = true
|
||||
setModelQuota = true
|
||||
case 408, 500, 502, 503, 504:
|
||||
if quotaCooldownDisabledForAuth(auth) {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(1 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
}
|
||||
default:
|
||||
state.NextRetryAfter = time.Time{}
|
||||
}
|
||||
default:
|
||||
state.NextRetryAfter = time.Time{}
|
||||
}
|
||||
|
||||
auth.Status = StatusError
|
||||
@@ -1883,14 +1890,65 @@ func statusCodeFromResult(err *Error) int {
|
||||
return err.StatusCode()
|
||||
}
|
||||
|
||||
func isModelSupportErrorMessage(message string) bool {
|
||||
lower := strings.ToLower(strings.TrimSpace(message))
|
||||
if lower == "" {
|
||||
return false
|
||||
}
|
||||
patterns := [...]string{
|
||||
"model_not_supported",
|
||||
"requested model is not supported",
|
||||
"requested model is unsupported",
|
||||
"requested model is unavailable",
|
||||
"model is not supported",
|
||||
"model not supported",
|
||||
"unsupported model",
|
||||
"model unavailable",
|
||||
"not available for your plan",
|
||||
"not available for your account",
|
||||
}
|
||||
for _, pattern := range patterns {
|
||||
if strings.Contains(lower, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isModelSupportError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
status := statusCodeFromError(err)
|
||||
if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity {
|
||||
return false
|
||||
}
|
||||
return isModelSupportErrorMessage(err.Error())
|
||||
}
|
||||
|
||||
func isModelSupportResultError(err *Error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
status := statusCodeFromResult(err)
|
||||
if status != http.StatusBadRequest && status != http.StatusUnprocessableEntity {
|
||||
return false
|
||||
}
|
||||
return isModelSupportErrorMessage(err.Message)
|
||||
}
|
||||
|
||||
// isRequestInvalidError returns true if the error represents a client request
|
||||
// error that should not be retried. Specifically, it treats 400 responses with
|
||||
// "invalid_request_error" and all 422 responses as request-shape failures,
|
||||
// where switching auths or pooled upstream models will not help.
|
||||
// where switching auths or pooled upstream models will not help. Model-support
|
||||
// errors are excluded so routing can fall through to another auth or upstream.
|
||||
func isRequestInvalidError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
if isModelSupportError(err) {
|
||||
return false
|
||||
}
|
||||
status := statusCodeFromError(err)
|
||||
switch status {
|
||||
case http.StatusBadRequest:
|
||||
|
||||
Reference in New Issue
Block a user