fix(claude): add interleaved-thinking beta header, AMP gzip error decoding, normalizeClaudeBudget max_tokens
1. Always include interleaved-thinking-2025-05-14 beta header so that thinking blocks are returned correctly for all Claude models. 2. Remove status-code guard in AMP reverse proxy ModifyResponse so that error responses (4xx/5xx) with hidden gzip encoding are decoded properly — prevents garbled error messages reaching the client. 3. In normalizeClaudeBudget, when the adjusted budget falls below the model minimum, set max_tokens = budgetTokens+1 instead of leaving the request unchanged (which causes a 400 from the API).
This commit is contained in:
@@ -108,11 +108,6 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
|
|||||||
// Modify incoming responses to handle gzip without Content-Encoding
|
// Modify incoming responses to handle gzip without Content-Encoding
|
||||||
// This addresses the same issue as inline handler gzip handling, but at the proxy level
|
// This addresses the same issue as inline handler gzip handling, but at the proxy level
|
||||||
proxy.ModifyResponse = func(resp *http.Response) error {
|
proxy.ModifyResponse = func(resp *http.Response) error {
|
||||||
// Only process successful responses
|
|
||||||
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip if already marked as gzip (Content-Encoding set)
|
// Skip if already marked as gzip (Content-Encoding set)
|
||||||
if resp.Header.Get("Content-Encoding") != "" {
|
if resp.Header.Get("Content-Encoding") != "" {
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -832,6 +832,9 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
|
|||||||
baseBetas += ",oauth-2025-04-20"
|
baseBetas += ",oauth-2025-04-20"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if !strings.Contains(baseBetas, "interleaved-thinking") {
|
||||||
|
baseBetas += ",interleaved-thinking-2025-05-14"
|
||||||
|
}
|
||||||
|
|
||||||
hasClaude1MHeader := false
|
hasClaude1MHeader := false
|
||||||
if ginHeaders != nil {
|
if ginHeaders != nil {
|
||||||
|
|||||||
@@ -194,7 +194,9 @@ func (a *Applier) normalizeClaudeBudget(body []byte, budgetTokens int, modelInfo
|
|||||||
}
|
}
|
||||||
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
if minBudget > 0 && adjustedBudget > 0 && adjustedBudget < minBudget {
|
||||||
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
// If enforcing the max_tokens constraint would push the budget below the model minimum,
|
||||||
// leave the request unchanged.
|
// increase max_tokens to accommodate the original budget instead of leaving the
|
||||||
|
// request unchanged (which would cause a 400 error from the API).
|
||||||
|
body, _ = sjson.SetBytes(body, "max_tokens", budgetTokens+1)
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user