Merge pull request #1940 from Blue-B/fix/claude-interleaved-thinking-amp-gzip-budget
fix(claude): enable interleaved-thinking beta, decode AMP error gzip, fix budget 400
This commit is contained in:
@@ -108,11 +108,6 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
|
|||||||
// Modify incoming responses to handle gzip without Content-Encoding
|
// Modify incoming responses to handle gzip without Content-Encoding
|
||||||
// This addresses the same issue as inline handler gzip handling, but at the proxy level
|
// This addresses the same issue as inline handler gzip handling, but at the proxy level
|
||||||
proxy.ModifyResponse = func(resp *http.Response) error {
|
proxy.ModifyResponse = func(resp *http.Response) error {
|
||||||
// Only process successful responses
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip if already marked as gzip (Content-Encoding set)
|
// Skip if already marked as gzip (Content-Encoding set)
|
||||||
if resp.Header.Get("Content-Encoding") != "" {
|
if resp.Header.Get("Content-Encoding") != "" {
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -840,6 +840,9 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
|
|||||||
baseBetas += ",oauth-2025-04-20"
|
baseBetas += ",oauth-2025-04-20"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if !strings.Contains(baseBetas, "interleaved-thinking") {
|
||||||
|
baseBetas += ",interleaved-thinking-2025-05-14"
|
||||||
|
}
|
||||||
|
|
||||||
hasClaude1MHeader := false
|
hasClaude1MHeader := false
|
||||||
if ginHeaders != nil {
|
if ginHeaders != nil {
|
||||||
|
|||||||
@@ -174,7 +174,8 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
|||||||
// Ensure the request satisfies Claude constraints:
|
// Ensure the request satisfies Claude constraints:
|
||||||
// 1) Determine effective max_tokens (request overrides model default)
|
// 1) Determine effective max_tokens (request overrides model default)
|
||||||
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
|
// 2) If budget_tokens >= max_tokens, reduce budget_tokens to max_tokens-1
|
||||||
// 3) If the adjusted budget falls below the model minimum, leave the request unchanged
|
// 3) If the adjusted budget falls below the model minimum, try raising max_tokens
|
||||||
|
// (clamped to MaxCompletionTokens); disable thinking if constraints are unsatisfiable
|
||||||
// 4) If max_tokens came from model default, write it back into the request
|
// 4) If max_tokens came from model default, write it back into the request
|
||||||
|
|
||||||
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
effectiveMax, setDefaultMax := a.effectiveMaxTokens(body, modelInfo)
|
||||||
@@ -193,8 +194,28 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
|||||||
minBudget = modelInfo.Thinking.Min
|
minBudget = modelInfo.Thinking.Min
|
||||||
}
|
}
|
||||||
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
||||||
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
// Enforcing budget_tokens < max_tokens pushed the budget below the model minimum.
|
||||||
// leave the request unchanged.
|
// Try raising max_tokens to fit the original budget.
|
||||||
|
needed := budgetTokens + 1
|
||||||
|
maxAllowed := 0
|
||||||
|
if modelInfo != nil {
|
||||||
|
maxAllowed = modelInfo.MaxCompletionTokens
|
||||||
|
}
|
||||||
|
if maxAllowed > 0 && needed > maxAllowed {
|
||||||
|
// Cannot use original budget; cap max_tokens at model limit.
|
||||||
|
needed = maxAllowed
|
||||||
|
}
|
||||||
|
cappedBudget := needed - 1
|
||||||
|
if cappedBudget < minBudget {
|
||||||
|
// Impossible to satisfy both budget >= minBudget and budget < max_tokens
|
||||||
|
// within the model's completion limit. Disable thinking entirely.
|
||||||
|
body, _ = sjson.DeleteBytes(body, "thinking")
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", needed)
|
||||||
|
if cappedBudget != budgetTokens {
|
||||||
|
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", cappedBudget)
|
||||||
|
}
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,99 @@
|
|||||||
|
package claude
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNormalizeClaudeBudget_RaisesMaxTokens(t *testing.T) {
|
||||||
|
a := &Applier{}
|
||||||
|
modelInfo := ®istry.ModelInfo{
|
||||||
|
MaxCompletionTokens: 64000,
|
||||||
|
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||||
|
}
|
||||||
|
body := []byte(`{"max_tokens":1000,"thinking":{"type":"enabled","budget_tokens":5000}}`)
|
||||||
|
|
||||||
|
out := a.normalizeClaudeBudget(body, 5000, modelInfo)
|
||||||
|
|
||||||
|
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||||
|
if maxTok != 5001 {
|
||||||
|
t.Fatalf("max_tokens = %d, want 5001, body=%s", maxTok, string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeBudget_ClampsToModelMax(t *testing.T) {
|
||||||
|
a := &Applier{}
|
||||||
|
modelInfo := ®istry.ModelInfo{
|
||||||
|
MaxCompletionTokens: 64000,
|
||||||
|
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||||
|
}
|
||||||
|
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":200000}}`)
|
||||||
|
|
||||||
|
out := a.normalizeClaudeBudget(body, 200000, modelInfo)
|
||||||
|
|
||||||
|
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||||
|
if maxTok != 64000 {
|
||||||
|
t.Fatalf("max_tokens = %d, want 64000 (capped to model limit), body=%s", maxTok, string(out))
|
||||||
|
}
|
||||||
|
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||||
|
if budget != 63999 {
|
||||||
|
t.Fatalf("budget_tokens = %d, want 63999 (max_tokens-1), body=%s", budget, string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeBudget_DisablesThinkingWhenUnsatisfiable(t *testing.T) {
|
||||||
|
a := &Applier{}
|
||||||
|
modelInfo := ®istry.ModelInfo{
|
||||||
|
MaxCompletionTokens: 1000,
|
||||||
|
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||||
|
}
|
||||||
|
body := []byte(`{"max_tokens":500,"thinking":{"type":"enabled","budget_tokens":2000}}`)
|
||||||
|
|
||||||
|
out := a.normalizeClaudeBudget(body, 2000, modelInfo)
|
||||||
|
|
||||||
|
if gjson.GetBytes(out, "thinking").Exists() {
|
||||||
|
t.Fatalf("thinking should be removed when constraints are unsatisfiable, body=%s", string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeBudget_NoClamping(t *testing.T) {
|
||||||
|
a := &Applier{}
|
||||||
|
modelInfo := ®istry.ModelInfo{
|
||||||
|
MaxCompletionTokens: 64000,
|
||||||
|
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||||
|
}
|
||||||
|
body := []byte(`{"max_tokens":32000,"thinking":{"type":"enabled","budget_tokens":16000}}`)
|
||||||
|
|
||||||
|
out := a.normalizeClaudeBudget(body, 16000, modelInfo)
|
||||||
|
|
||||||
|
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||||
|
if maxTok != 32000 {
|
||||||
|
t.Fatalf("max_tokens should remain 32000, got %d, body=%s", maxTok, string(out))
|
||||||
|
}
|
||||||
|
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||||
|
if budget != 16000 {
|
||||||
|
t.Fatalf("budget_tokens should remain 16000, got %d, body=%s", budget, string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNormalizeClaudeBudget_AdjustsBudgetToMaxMinus1(t *testing.T) {
|
||||||
|
a := &Applier{}
|
||||||
|
modelInfo := ®istry.ModelInfo{
|
||||||
|
MaxCompletionTokens: 8192,
|
||||||
|
Thinking: ®istry.ThinkingSupport{Min: 1024, Max: 128000},
|
||||||
|
}
|
||||||
|
body := []byte(`{"max_tokens":8192,"thinking":{"type":"enabled","budget_tokens":10000}}`)
|
||||||
|
|
||||||
|
out := a.normalizeClaudeBudget(body, 10000, modelInfo)
|
||||||
|
|
||||||
|
maxTok := gjson.GetBytes(out, "max_tokens").Int()
|
||||||
|
if maxTok != 8192 {
|
||||||
|
t.Fatalf("max_tokens = %d, want 8192 (unchanged), body=%s", maxTok, string(out))
|
||||||
|
}
|
||||||
|
budget := gjson.GetBytes(out, "thinking.budget_tokens").Int()
|
||||||
|
if budget != 8191 {
|
||||||
|
t.Fatalf("budget_tokens = %d, want 8191 (max_tokens-1), body=%s", budget, string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user