fix(auth): skip downtime for request-scoped 404 errors in model state management
This commit is contained in:
@@ -1734,77 +1734,79 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
||||
}
|
||||
} else {
|
||||
if result.Model != "" {
|
||||
state := ensureModelState(auth, result.Model)
|
||||
state.Unavailable = true
|
||||
state.Status = StatusError
|
||||
state.UpdatedAt = now
|
||||
if result.Error != nil {
|
||||
state.LastError = cloneError(result.Error)
|
||||
state.StatusMessage = result.Error.Message
|
||||
auth.LastError = cloneError(result.Error)
|
||||
auth.StatusMessage = result.Error.Message
|
||||
}
|
||||
if !isRequestScopedNotFoundResultError(result.Error) {
|
||||
state := ensureModelState(auth, result.Model)
|
||||
state.Unavailable = true
|
||||
state.Status = StatusError
|
||||
state.UpdatedAt = now
|
||||
if result.Error != nil {
|
||||
state.LastError = cloneError(result.Error)
|
||||
state.StatusMessage = result.Error.Message
|
||||
auth.LastError = cloneError(result.Error)
|
||||
auth.StatusMessage = result.Error.Message
|
||||
}
|
||||
|
||||
statusCode := statusCodeFromResult(result.Error)
|
||||
if isModelSupportResultError(result.Error) {
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "model_not_supported"
|
||||
shouldSuspendModel = true
|
||||
} else {
|
||||
switch statusCode {
|
||||
case 401:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "unauthorized"
|
||||
shouldSuspendModel = true
|
||||
case 402, 403:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "payment_required"
|
||||
shouldSuspendModel = true
|
||||
case 404:
|
||||
statusCode := statusCodeFromResult(result.Error)
|
||||
if isModelSupportResultError(result.Error) {
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "not_found"
|
||||
suspendReason = "model_not_supported"
|
||||
shouldSuspendModel = true
|
||||
case 429:
|
||||
var next time.Time
|
||||
backoffLevel := state.Quota.BackoffLevel
|
||||
if result.RetryAfter != nil {
|
||||
next = now.Add(*result.RetryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
}
|
||||
backoffLevel = nextLevel
|
||||
}
|
||||
state.NextRetryAfter = next
|
||||
state.Quota = QuotaState{
|
||||
Exceeded: true,
|
||||
Reason: "quota",
|
||||
NextRecoverAt: next,
|
||||
BackoffLevel: backoffLevel,
|
||||
}
|
||||
suspendReason = "quota"
|
||||
shouldSuspendModel = true
|
||||
setModelQuota = true
|
||||
case 408, 500, 502, 503, 504:
|
||||
if quotaCooldownDisabledForAuth(auth) {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(1 * time.Minute)
|
||||
} else {
|
||||
switch statusCode {
|
||||
case 401:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "unauthorized"
|
||||
shouldSuspendModel = true
|
||||
case 402, 403:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "payment_required"
|
||||
shouldSuspendModel = true
|
||||
case 404:
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "not_found"
|
||||
shouldSuspendModel = true
|
||||
case 429:
|
||||
var next time.Time
|
||||
backoffLevel := state.Quota.BackoffLevel
|
||||
if result.RetryAfter != nil {
|
||||
next = now.Add(*result.RetryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
}
|
||||
backoffLevel = nextLevel
|
||||
}
|
||||
state.NextRetryAfter = next
|
||||
state.Quota = QuotaState{
|
||||
Exceeded: true,
|
||||
Reason: "quota",
|
||||
NextRecoverAt: next,
|
||||
BackoffLevel: backoffLevel,
|
||||
}
|
||||
suspendReason = "quota"
|
||||
shouldSuspendModel = true
|
||||
setModelQuota = true
|
||||
case 408, 500, 502, 503, 504:
|
||||
if quotaCooldownDisabledForAuth(auth) {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(1 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
}
|
||||
default:
|
||||
state.NextRetryAfter = time.Time{}
|
||||
}
|
||||
default:
|
||||
state.NextRetryAfter = time.Time{}
|
||||
}
|
||||
}
|
||||
|
||||
auth.Status = StatusError
|
||||
auth.UpdatedAt = now
|
||||
updateAggregatedAvailability(auth, now)
|
||||
auth.Status = StatusError
|
||||
auth.UpdatedAt = now
|
||||
updateAggregatedAvailability(auth, now)
|
||||
}
|
||||
} else {
|
||||
applyAuthFailureState(auth, result.Error, result.RetryAfter, now)
|
||||
}
|
||||
@@ -2056,11 +2058,29 @@ func isModelSupportResultError(err *Error) bool {
|
||||
return isModelSupportErrorMessage(err.Message)
|
||||
}
|
||||
|
||||
func isRequestScopedNotFoundMessage(message string) bool {
|
||||
if message == "" {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(message)
|
||||
return strings.Contains(lower, "item with id") &&
|
||||
strings.Contains(lower, "not found") &&
|
||||
strings.Contains(lower, "items are not persisted when `store` is set to false")
|
||||
}
|
||||
|
||||
func isRequestScopedNotFoundResultError(err *Error) bool {
|
||||
if err == nil || statusCodeFromResult(err) != http.StatusNotFound {
|
||||
return false
|
||||
}
|
||||
return isRequestScopedNotFoundMessage(err.Message)
|
||||
}
|
||||
|
||||
// isRequestInvalidError returns true if the error represents a client request
|
||||
// error that should not be retried. Specifically, it treats 400 responses with
|
||||
// "invalid_request_error" and all 422 responses as request-shape failures,
|
||||
// where switching auths or pooled upstream models will not help. Model-support
|
||||
// errors are excluded so routing can fall through to another auth or upstream.
|
||||
// "invalid_request_error", request-scoped 404 item misses caused by `store=false`,
|
||||
// and all 422 responses as request-shape failures, where switching auths or
|
||||
// pooled upstream models will not help. Model-support errors are excluded so
|
||||
// routing can fall through to another auth or upstream.
|
||||
func isRequestInvalidError(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
@@ -2072,6 +2092,8 @@ func isRequestInvalidError(err error) bool {
|
||||
switch status {
|
||||
case http.StatusBadRequest:
|
||||
return strings.Contains(err.Error(), "invalid_request_error")
|
||||
case http.StatusNotFound:
|
||||
return isRequestScopedNotFoundMessage(err.Error())
|
||||
case http.StatusUnprocessableEntity:
|
||||
return true
|
||||
default:
|
||||
@@ -2083,6 +2105,9 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
|
||||
if auth == nil {
|
||||
return
|
||||
}
|
||||
if isRequestScopedNotFoundResultError(resultErr) {
|
||||
return
|
||||
}
|
||||
auth.Unavailable = true
|
||||
auth.Status = StatusError
|
||||
auth.UpdatedAt = now
|
||||
|
||||
@@ -12,6 +12,8 @@ import (
|
||||
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
|
||||
)
|
||||
|
||||
const requestScopedNotFoundMessage = "Item with id 'rs_0b5f3eb6f51f175c0169ca74e4a85881998539920821603a74' not found. Items are not persisted when `store` is set to false. Try again with `store` set to true, or remove this item from your input."
|
||||
|
||||
func TestManager_ShouldRetryAfterError_RespectsAuthRequestRetryOverride(t *testing.T) {
|
||||
m := NewManager(nil, nil, nil)
|
||||
m.SetRetryConfig(3, 30*time.Second, 0)
|
||||
@@ -447,3 +449,114 @@ func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
|
||||
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_MarkResult_RequestScopedNotFoundDoesNotCooldownAuth(t *testing.T) {
|
||||
m := NewManager(nil, nil, nil)
|
||||
|
||||
auth := &Auth{
|
||||
ID: "auth-1",
|
||||
Provider: "openai",
|
||||
}
|
||||
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
|
||||
t.Fatalf("register auth: %v", errRegister)
|
||||
}
|
||||
|
||||
model := "gpt-4.1"
|
||||
m.MarkResult(context.Background(), Result{
|
||||
AuthID: auth.ID,
|
||||
Provider: auth.Provider,
|
||||
Model: model,
|
||||
Success: false,
|
||||
Error: &Error{
|
||||
HTTPStatus: http.StatusNotFound,
|
||||
Message: requestScopedNotFoundMessage,
|
||||
},
|
||||
})
|
||||
|
||||
updated, ok := m.GetByID(auth.ID)
|
||||
if !ok || updated == nil {
|
||||
t.Fatalf("expected auth to be present")
|
||||
}
|
||||
if updated.Unavailable {
|
||||
t.Fatalf("expected request-scoped 404 to keep auth available")
|
||||
}
|
||||
if !updated.NextRetryAfter.IsZero() {
|
||||
t.Fatalf("expected request-scoped 404 to keep auth cooldown unset, got %v", updated.NextRetryAfter)
|
||||
}
|
||||
if state := updated.ModelStates[model]; state != nil {
|
||||
t.Fatalf("expected request-scoped 404 to avoid model cooldown state, got %#v", state)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_RequestScopedNotFoundStopsRetryWithoutSuspendingAuth(t *testing.T) {
|
||||
m := NewManager(nil, nil, nil)
|
||||
executor := &authFallbackExecutor{
|
||||
id: "openai",
|
||||
executeErrors: map[string]error{
|
||||
"aa-bad-auth": &Error{
|
||||
HTTPStatus: http.StatusNotFound,
|
||||
Message: requestScopedNotFoundMessage,
|
||||
},
|
||||
},
|
||||
}
|
||||
m.RegisterExecutor(executor)
|
||||
|
||||
model := "gpt-4.1"
|
||||
badAuth := &Auth{ID: "aa-bad-auth", Provider: "openai"}
|
||||
goodAuth := &Auth{ID: "bb-good-auth", Provider: "openai"}
|
||||
|
||||
reg := registry.GetGlobalRegistry()
|
||||
reg.RegisterClient(badAuth.ID, "openai", []*registry.ModelInfo{{ID: model}})
|
||||
reg.RegisterClient(goodAuth.ID, "openai", []*registry.ModelInfo{{ID: model}})
|
||||
t.Cleanup(func() {
|
||||
reg.UnregisterClient(badAuth.ID)
|
||||
reg.UnregisterClient(goodAuth.ID)
|
||||
})
|
||||
|
||||
if _, errRegister := m.Register(context.Background(), badAuth); errRegister != nil {
|
||||
t.Fatalf("register bad auth: %v", errRegister)
|
||||
}
|
||||
if _, errRegister := m.Register(context.Background(), goodAuth); errRegister != nil {
|
||||
t.Fatalf("register good auth: %v", errRegister)
|
||||
}
|
||||
|
||||
_, errExecute := m.Execute(context.Background(), []string{"openai"}, cliproxyexecutor.Request{Model: model}, cliproxyexecutor.Options{})
|
||||
if errExecute == nil {
|
||||
t.Fatal("expected request-scoped not-found error")
|
||||
}
|
||||
errResult, ok := errExecute.(*Error)
|
||||
if !ok {
|
||||
t.Fatalf("expected *Error, got %T", errExecute)
|
||||
}
|
||||
if errResult.HTTPStatus != http.StatusNotFound {
|
||||
t.Fatalf("status = %d, want %d", errResult.HTTPStatus, http.StatusNotFound)
|
||||
}
|
||||
if errResult.Message != requestScopedNotFoundMessage {
|
||||
t.Fatalf("message = %q, want %q", errResult.Message, requestScopedNotFoundMessage)
|
||||
}
|
||||
|
||||
got := executor.ExecuteCalls()
|
||||
want := []string{badAuth.ID}
|
||||
if len(got) != len(want) {
|
||||
t.Fatalf("execute calls = %v, want %v", got, want)
|
||||
}
|
||||
for i := range want {
|
||||
if got[i] != want[i] {
|
||||
t.Fatalf("execute call %d auth = %q, want %q", i, got[i], want[i])
|
||||
}
|
||||
}
|
||||
|
||||
updatedBad, ok := m.GetByID(badAuth.ID)
|
||||
if !ok || updatedBad == nil {
|
||||
t.Fatalf("expected bad auth to remain registered")
|
||||
}
|
||||
if updatedBad.Unavailable {
|
||||
t.Fatalf("expected request-scoped 404 to keep bad auth available")
|
||||
}
|
||||
if !updatedBad.NextRetryAfter.IsZero() {
|
||||
t.Fatalf("expected request-scoped 404 to keep bad auth cooldown unset, got %v", updatedBad.NextRetryAfter)
|
||||
}
|
||||
if state := updatedBad.ModelStates[model]; state != nil {
|
||||
t.Fatalf("expected request-scoped 404 to avoid bad auth model cooldown state, got %#v", state)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user