Merge pull request #2576 from zilianpn/fix/disable-cooling-auth-errors
fix(auth): honor disable-cooling and enrich no-auth errors
This commit is contained in:
@@ -1916,6 +1916,7 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
||||
} else {
|
||||
if result.Model != "" {
|
||||
if !isRequestScopedNotFoundResultError(result.Error) {
|
||||
disableCooling := quotaCooldownDisabledForAuth(auth)
|
||||
state := ensureModelState(auth, result.Model)
|
||||
state.Unavailable = true
|
||||
state.Status = StatusError
|
||||
@@ -1936,31 +1937,45 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
||||
} else {
|
||||
switch statusCode {
|
||||
case 401:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "unauthorized"
|
||||
shouldSuspendModel = true
|
||||
if disableCooling {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "unauthorized"
|
||||
shouldSuspendModel = true
|
||||
}
|
||||
case 402, 403:
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "payment_required"
|
||||
shouldSuspendModel = true
|
||||
if disableCooling {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(30 * time.Minute)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "payment_required"
|
||||
shouldSuspendModel = true
|
||||
}
|
||||
case 404:
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "not_found"
|
||||
shouldSuspendModel = true
|
||||
if disableCooling {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(12 * time.Hour)
|
||||
state.NextRetryAfter = next
|
||||
suspendReason = "not_found"
|
||||
shouldSuspendModel = true
|
||||
}
|
||||
case 429:
|
||||
var next time.Time
|
||||
backoffLevel := state.Quota.BackoffLevel
|
||||
if result.RetryAfter != nil {
|
||||
next = now.Add(*result.RetryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, quotaCooldownDisabledForAuth(auth))
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
if !disableCooling {
|
||||
if result.RetryAfter != nil {
|
||||
next = now.Add(*result.RetryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(backoffLevel, disableCooling)
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
}
|
||||
backoffLevel = nextLevel
|
||||
}
|
||||
backoffLevel = nextLevel
|
||||
}
|
||||
state.NextRetryAfter = next
|
||||
state.Quota = QuotaState{
|
||||
@@ -1969,11 +1984,13 @@ func (m *Manager) MarkResult(ctx context.Context, result Result) {
|
||||
NextRecoverAt: next,
|
||||
BackoffLevel: backoffLevel,
|
||||
}
|
||||
suspendReason = "quota"
|
||||
shouldSuspendModel = true
|
||||
setModelQuota = true
|
||||
if !disableCooling {
|
||||
suspendReason = "quota"
|
||||
shouldSuspendModel = true
|
||||
setModelQuota = true
|
||||
}
|
||||
case 408, 500, 502, 503, 504:
|
||||
if quotaCooldownDisabledForAuth(auth) {
|
||||
if disableCooling {
|
||||
state.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
next := now.Add(1 * time.Minute)
|
||||
@@ -2324,6 +2341,7 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
|
||||
if isRequestScopedNotFoundResultError(resultErr) {
|
||||
return
|
||||
}
|
||||
disableCooling := quotaCooldownDisabledForAuth(auth)
|
||||
auth.Unavailable = true
|
||||
auth.Status = StatusError
|
||||
auth.UpdatedAt = now
|
||||
@@ -2337,32 +2355,46 @@ func applyAuthFailureState(auth *Auth, resultErr *Error, retryAfter *time.Durati
|
||||
switch statusCode {
|
||||
case 401:
|
||||
auth.StatusMessage = "unauthorized"
|
||||
auth.NextRetryAfter = now.Add(30 * time.Minute)
|
||||
if disableCooling {
|
||||
auth.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
auth.NextRetryAfter = now.Add(30 * time.Minute)
|
||||
}
|
||||
case 402, 403:
|
||||
auth.StatusMessage = "payment_required"
|
||||
auth.NextRetryAfter = now.Add(30 * time.Minute)
|
||||
if disableCooling {
|
||||
auth.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
auth.NextRetryAfter = now.Add(30 * time.Minute)
|
||||
}
|
||||
case 404:
|
||||
auth.StatusMessage = "not_found"
|
||||
auth.NextRetryAfter = now.Add(12 * time.Hour)
|
||||
if disableCooling {
|
||||
auth.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
auth.NextRetryAfter = now.Add(12 * time.Hour)
|
||||
}
|
||||
case 429:
|
||||
auth.StatusMessage = "quota exhausted"
|
||||
auth.Quota.Exceeded = true
|
||||
auth.Quota.Reason = "quota"
|
||||
var next time.Time
|
||||
if retryAfter != nil {
|
||||
next = now.Add(*retryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, quotaCooldownDisabledForAuth(auth))
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
if !disableCooling {
|
||||
if retryAfter != nil {
|
||||
next = now.Add(*retryAfter)
|
||||
} else {
|
||||
cooldown, nextLevel := nextQuotaCooldown(auth.Quota.BackoffLevel, disableCooling)
|
||||
if cooldown > 0 {
|
||||
next = now.Add(cooldown)
|
||||
}
|
||||
auth.Quota.BackoffLevel = nextLevel
|
||||
}
|
||||
auth.Quota.BackoffLevel = nextLevel
|
||||
}
|
||||
auth.Quota.NextRecoverAt = next
|
||||
auth.NextRetryAfter = next
|
||||
case 408, 500, 502, 503, 504:
|
||||
auth.StatusMessage = "transient upstream error"
|
||||
if quotaCooldownDisabledForAuth(auth) {
|
||||
if disableCooling {
|
||||
auth.NextRetryAfter = time.Time{}
|
||||
} else {
|
||||
auth.NextRetryAfter = now.Add(1 * time.Minute)
|
||||
|
||||
@@ -180,6 +180,34 @@ func (e *authFallbackExecutor) StreamCalls() []string {
|
||||
return out
|
||||
}
|
||||
|
||||
type retryAfterStatusError struct {
|
||||
status int
|
||||
message string
|
||||
retryAfter time.Duration
|
||||
}
|
||||
|
||||
func (e *retryAfterStatusError) Error() string {
|
||||
if e == nil {
|
||||
return ""
|
||||
}
|
||||
return e.message
|
||||
}
|
||||
|
||||
func (e *retryAfterStatusError) StatusCode() int {
|
||||
if e == nil {
|
||||
return 0
|
||||
}
|
||||
return e.status
|
||||
}
|
||||
|
||||
func (e *retryAfterStatusError) RetryAfter() *time.Duration {
|
||||
if e == nil {
|
||||
return nil
|
||||
}
|
||||
d := e.retryAfter
|
||||
return &d
|
||||
}
|
||||
|
||||
func newCredentialRetryLimitTestManager(t *testing.T, maxRetryCredentials int) (*Manager, *credentialRetryLimitExecutor) {
|
||||
t.Helper()
|
||||
|
||||
@@ -450,6 +478,174 @@ func TestManager_MarkResult_RespectsAuthDisableCoolingOverride(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_MarkResult_RespectsAuthDisableCoolingOverride_On403(t *testing.T) {
|
||||
prev := quotaCooldownDisabled.Load()
|
||||
quotaCooldownDisabled.Store(false)
|
||||
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
|
||||
|
||||
m := NewManager(nil, nil, nil)
|
||||
|
||||
auth := &Auth{
|
||||
ID: "auth-403",
|
||||
Provider: "claude",
|
||||
Metadata: map[string]any{
|
||||
"disable_cooling": true,
|
||||
},
|
||||
}
|
||||
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
|
||||
t.Fatalf("register auth: %v", errRegister)
|
||||
}
|
||||
|
||||
model := "test-model-403"
|
||||
reg := registry.GetGlobalRegistry()
|
||||
reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
|
||||
t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
|
||||
|
||||
m.MarkResult(context.Background(), Result{
|
||||
AuthID: auth.ID,
|
||||
Provider: "claude",
|
||||
Model: model,
|
||||
Success: false,
|
||||
Error: &Error{HTTPStatus: http.StatusForbidden, Message: "forbidden"},
|
||||
})
|
||||
|
||||
updated, ok := m.GetByID(auth.ID)
|
||||
if !ok || updated == nil {
|
||||
t.Fatalf("expected auth to be present")
|
||||
}
|
||||
state := updated.ModelStates[model]
|
||||
if state == nil {
|
||||
t.Fatalf("expected model state to be present")
|
||||
}
|
||||
if !state.NextRetryAfter.IsZero() {
|
||||
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
|
||||
}
|
||||
|
||||
if count := reg.GetModelCount(model); count <= 0 {
|
||||
t.Fatalf("expected model count > 0 when disable_cooling=true, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_Execute_DisableCooling_DoesNotBlackoutAfter403(t *testing.T) {
|
||||
prev := quotaCooldownDisabled.Load()
|
||||
quotaCooldownDisabled.Store(false)
|
||||
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
|
||||
|
||||
m := NewManager(nil, nil, nil)
|
||||
executor := &authFallbackExecutor{
|
||||
id: "claude",
|
||||
executeErrors: map[string]error{
|
||||
"auth-403-exec": &Error{
|
||||
HTTPStatus: http.StatusForbidden,
|
||||
Message: "forbidden",
|
||||
},
|
||||
},
|
||||
}
|
||||
m.RegisterExecutor(executor)
|
||||
|
||||
auth := &Auth{
|
||||
ID: "auth-403-exec",
|
||||
Provider: "claude",
|
||||
Metadata: map[string]any{
|
||||
"disable_cooling": true,
|
||||
},
|
||||
}
|
||||
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
|
||||
t.Fatalf("register auth: %v", errRegister)
|
||||
}
|
||||
|
||||
model := "test-model-403-exec"
|
||||
reg := registry.GetGlobalRegistry()
|
||||
reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
|
||||
t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
|
||||
|
||||
req := cliproxyexecutor.Request{Model: model}
|
||||
_, errExecute1 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
|
||||
if errExecute1 == nil {
|
||||
t.Fatal("expected first execute error")
|
||||
}
|
||||
if statusCodeFromError(errExecute1) != http.StatusForbidden {
|
||||
t.Fatalf("first execute status = %d, want %d", statusCodeFromError(errExecute1), http.StatusForbidden)
|
||||
}
|
||||
|
||||
_, errExecute2 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
|
||||
if errExecute2 == nil {
|
||||
t.Fatal("expected second execute error")
|
||||
}
|
||||
if statusCodeFromError(errExecute2) != http.StatusForbidden {
|
||||
t.Fatalf("second execute status = %d, want %d", statusCodeFromError(errExecute2), http.StatusForbidden)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_Execute_DisableCooling_DoesNotBlackoutAfter429RetryAfter(t *testing.T) {
|
||||
prev := quotaCooldownDisabled.Load()
|
||||
quotaCooldownDisabled.Store(false)
|
||||
t.Cleanup(func() { quotaCooldownDisabled.Store(prev) })
|
||||
|
||||
m := NewManager(nil, nil, nil)
|
||||
executor := &authFallbackExecutor{
|
||||
id: "claude",
|
||||
executeErrors: map[string]error{
|
||||
"auth-429-exec": &retryAfterStatusError{
|
||||
status: http.StatusTooManyRequests,
|
||||
message: "quota exhausted",
|
||||
retryAfter: 2 * time.Minute,
|
||||
},
|
||||
},
|
||||
}
|
||||
m.RegisterExecutor(executor)
|
||||
|
||||
auth := &Auth{
|
||||
ID: "auth-429-exec",
|
||||
Provider: "claude",
|
||||
Metadata: map[string]any{
|
||||
"disable_cooling": true,
|
||||
},
|
||||
}
|
||||
if _, errRegister := m.Register(context.Background(), auth); errRegister != nil {
|
||||
t.Fatalf("register auth: %v", errRegister)
|
||||
}
|
||||
|
||||
model := "test-model-429-exec"
|
||||
reg := registry.GetGlobalRegistry()
|
||||
reg.RegisterClient(auth.ID, "claude", []*registry.ModelInfo{{ID: model}})
|
||||
t.Cleanup(func() { reg.UnregisterClient(auth.ID) })
|
||||
|
||||
req := cliproxyexecutor.Request{Model: model}
|
||||
_, errExecute1 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
|
||||
if errExecute1 == nil {
|
||||
t.Fatal("expected first execute error")
|
||||
}
|
||||
if statusCodeFromError(errExecute1) != http.StatusTooManyRequests {
|
||||
t.Fatalf("first execute status = %d, want %d", statusCodeFromError(errExecute1), http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
_, errExecute2 := m.Execute(context.Background(), []string{"claude"}, req, cliproxyexecutor.Options{})
|
||||
if errExecute2 == nil {
|
||||
t.Fatal("expected second execute error")
|
||||
}
|
||||
if statusCodeFromError(errExecute2) != http.StatusTooManyRequests {
|
||||
t.Fatalf("second execute status = %d, want %d", statusCodeFromError(errExecute2), http.StatusTooManyRequests)
|
||||
}
|
||||
|
||||
calls := executor.ExecuteCalls()
|
||||
if len(calls) != 2 {
|
||||
t.Fatalf("execute calls = %d, want 2", len(calls))
|
||||
}
|
||||
|
||||
updated, ok := m.GetByID(auth.ID)
|
||||
if !ok || updated == nil {
|
||||
t.Fatalf("expected auth to be present")
|
||||
}
|
||||
state := updated.ModelStates[model]
|
||||
if state == nil {
|
||||
t.Fatalf("expected model state to be present")
|
||||
}
|
||||
if !state.NextRetryAfter.IsZero() {
|
||||
t.Fatalf("expected NextRetryAfter to be zero when disable_cooling=true, got %v", state.NextRetryAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestManager_MarkResult_RequestScopedNotFoundDoesNotCooldownAuth(t *testing.T) {
|
||||
m := NewManager(nil, nil, nil)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user