fix(claude): clamp max_tokens to model limit in normalizeClaudeBudget

When adjustedBudget < minBudget, the previous fix blindly set
max_tokens = budgetTokens+1 which could exceed MaxCompletionTokens.

Now: cap max_tokens at MaxCompletionTokens, recalculate budget, and
disable thinking entirely if constraints are unsatisfiable.

Add unit tests covering raise, clamp, disable, and no-op scenarios.
This commit is contained in:
Blue-B
2026-03-09 22:10:30 +09:00
parent 07d6689d87
commit 5f58248016
2 changed files with 123 additions and 5 deletions
+24 -5
View File
@@ -174,7 +174,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
// Ensure the request satisfies Claude constraints:
// 1) Determine effective max_tokens (request overrides model default)
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
// 3) If the adjusted budget falls below the model minimum, try raising max_tokens
// (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable
// 4) If max_tokens came from model default, write it back into the request
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
@@ -193,10 +194,28 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
minBudget = modelInfo.Thinking.Min
}
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
// If enforcing the max_tokens constraint would push the budget below the model minimum,
// increase max_tokens to accommodate the original budget instead of leaving the
// request unchanged (which would cause a 400 error from the API).
body, _ = sjson.SetBytes(body, "max_tokens", budgetTokens+1)
// Enforcing budget_tokens < max_tokens pushed the budget below the model minimum.
// Try raising max_tokens to fit the original budget.
needed := budgetTokens + 1
maxAllowed := 0
if modelInfo != nil {
maxAllowed = modelInfo.MaxCompletionTokens
}
if maxAllowed > 0 && needed > maxAllowed {
// Cannot use original budget; cap max_tokens at model limit.
needed = maxAllowed
}
cappedBudget := needed - 1
if cappedBudget < minBudget {
// Impossible to satisfy both budget >= minBudget and budget < max_tokens
// within the model's completion limit. Disable thinking entirely.
body, _ = sjson.DeleteBytes(body, "thinking")
return body
}
body, _ = sjson.SetBytes(body, "max_tokens", needed)
if cappedBudget != budgetTokens {
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget)
}
return body
}