diff --git a/config.example.yaml b/config.example.yaml index dfd7454b..31f16973 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -100,7 +100,7 @@ ws-auth: false # excluded-models: # - "claude-opus-4-5-20251101" # exclude specific models (exact match) # - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219) -# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking) +# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking) # - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022) # OpenAI compatibility providers diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 2f87f195..adaff867 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.5 Haiku", ContextLength: 200000, MaxCompletionTokens: 64000, + // Thinking: not supported for Haiku models }, { ID: "claude-sonnet-4-5-20250929", @@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.1 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-opus-4-20250514", @@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-sonnet-4-20250514", @@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-3-7-sonnet-20250219", @@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 3.7 Sonnet", ContextLength: 128000, MaxCompletionTokens: 8192, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-3-5-haiku-20241022", @@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 3.5 Haiku", ContextLength: 128000, MaxCompletionTokens: 8192, + // Thinking: not supported for Haiku models }, } } @@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}}, }, { ID: "gpt-5-codex", @@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5-codex-mini", @@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5.1", @@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}}, }, { ID: "gpt-5.1-codex", @@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5.1-codex-mini", @@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5.1-codex-max", @@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, }, } } @@ -610,6 +623,7 @@ func GetIFlowModels() []*ModelInfo { DisplayName string Description string Created int64 + Thinking *ThinkingSupport }{ {ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600}, {ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800}, @@ -619,17 +633,17 @@ func GetIFlowModels() []*ModelInfo { {ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400}, {ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400}, {ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000}, - {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200}, + {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, {ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000}, {ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000}, {ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200}, - {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200}, + {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, {ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200}, {ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400}, - {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600}, + {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, {ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600}, {ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600}, - {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000}, + {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, } models := make([]*ModelInfo, 0, len(entries)) for _, entry := range entries { @@ -641,6 +655,7 @@ func GetIFlowModels() []*ModelInfo { Type: "iflow", DisplayName: entry.DisplayName, Description: entry.Description, + Thinking: entry.Thinking, }) } return models diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 5ef9007f..f3517bde 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -63,6 +63,9 @@ type ThinkingSupport struct { ZeroAllowed bool `json:"zero_allowed,omitempty"` // DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget). DynamicAllowed bool `json:"dynamic_allowed,omitempty"` + // Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high"). + // When set, the model uses level-based reasoning instead of token budgets. + Levels []string `json:"levels,omitempty"` } // ModelRegistration tracks a model's availability diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index c7470954..6af08608 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) { return betas, body } -// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes. +// injectThinkingConfig adds thinking configuration based on metadata using the unified flow. +// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata +// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini. func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte { - // Only inject if thinking config is not already present - if gjson.GetBytes(body, "thinking").Exists() { + budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata) + if !ok { return body } - - budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata) - if !ok || budgetTokens <= 0 { - return body - } - - body, _ = sjson.SetBytes(body, "thinking.type", "enabled") - body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens) - return body -} - -func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) { - budget, include, effort, matched := util.ThinkingFromMetadata(metadata) - if matched { - if include != nil && !*include { - return 0, false - } - if budget != nil { - normalized := util.NormalizeThinkingBudget(modelName, *budget) - if normalized > 0 { - return normalized, true - } - return 0, false - } - if effort != nil { - if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 { - return derived, true - } - } - } - return claudeBudgetFromSuffix(modelName) -} - -func claudeBudgetFromSuffix(modelName string) (int, bool) { - lower := strings.ToLower(strings.TrimSpace(modelName)) - switch { - case strings.HasSuffix(lower, "-thinking-low"): - return 1024, true - case strings.HasSuffix(lower, "-thinking-medium"): - return 8192, true - case strings.HasSuffix(lower, "-thinking-high"): - return 24576, true - case strings.HasSuffix(lower, "-thinking"): - return 8192, true - default: - return 0, false - } + return util.ApplyClaudeThinkingConfig(body, budget) } // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 46a30177..b9470b3c 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -54,7 +54,11 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return resp, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "stream", true) @@ -148,7 +152,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return nil, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "model", upstreamModel) @@ -246,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth modelForCounting := req.Model - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index c68a6431..d1a69812 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -57,10 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return resp, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -143,10 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return nil, errValidate + } // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. toolsResult := gjson.GetBytes(body, "tools") if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 93122c20..68b2963a 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -58,10 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } + translated = normalizeThinkingConfig(translated, upstreamModel) + if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { + return resp, errValidate + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -147,10 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } + translated = normalizeThinkingConfig(translated, upstreamModel) + if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { + return nil, errValidate + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 37e3141a..be249868 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -1,6 +1,8 @@ package executor import ( + "fmt" + "net/http" "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" @@ -9,7 +11,7 @@ import ( "github.com/tidwall/sjson" ) -// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) +// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192]) // for standard Gemini format payloads. It normalizes the budget when the model supports thinking. func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) @@ -26,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } -// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) +// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192]) // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) @@ -43,40 +45,21 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } -// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata. -// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking. -func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte { +// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. +// Metadata values take precedence over any existing field when the model supports thinking, intentionally +// overwriting caller-provided values to honor suffix/default metadata priority. +func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte { if len(metadata) == 0 { return payload } if !util.ModelSupportsThinking(model) { return payload } - if gjson.GetBytes(payload, "reasoning.effort").Exists() { + if field == "" { return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil { - return updated - } - } - return payload -} - -// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field) -// when present in metadata. It avoids overwriting an existing reasoning_effort field. -func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte { - if len(metadata) == 0 { - return payload - } - if !util.ModelSupportsThinking(model) { - return payload - } - if gjson.GetBytes(payload, "reasoning_effort").Exists() { - return payload - } - if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } } @@ -232,3 +215,93 @@ func matchModelPattern(pattern, model string) bool { } return pi == len(pattern) } + +// normalizeThinkingConfig normalizes thinking-related fields in the payload +// based on model capabilities. For models without thinking support, it strips +// reasoning fields. For models with level-based thinking, it validates and +// normalizes the reasoning effort level. +func normalizeThinkingConfig(payload []byte, model string) []byte { + if len(payload) == 0 || model == "" { + return payload + } + + if !util.ModelSupportsThinking(model) { + return stripThinkingFields(payload) + } + + if util.ModelUsesThinkingLevels(model) { + return normalizeReasoningEffortLevel(payload, model) + } + + return payload +} + +// stripThinkingFields removes thinking-related fields from the payload for +// models that do not support thinking. +func stripThinkingFields(payload []byte) []byte { + fieldsToRemove := []string{ + "reasoning", + "reasoning_effort", + "reasoning.effort", + } + out := payload + for _, field := range fieldsToRemove { + if gjson.GetBytes(out, field).Exists() { + out, _ = sjson.DeleteBytes(out, field) + } + } + return out +} + +// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort +// or reasoning.effort field for level-based thinking models. +func normalizeReasoningEffortLevel(payload []byte, model string) []byte { + out := payload + + if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() { + if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { + out, _ = sjson.SetBytes(out, "reasoning_effort", normalized) + } + } + + if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() { + if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { + out, _ = sjson.SetBytes(out, "reasoning.effort", normalized) + } + } + + return out +} + +// validateThinkingConfig checks for unsupported reasoning levels on level-based models. +// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently +// downgrading requests. +func validateThinkingConfig(payload []byte, model string) error { + if len(payload) == 0 || model == "" { + return nil + } + if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) { + return nil + } + + levels := util.GetModelThinkingLevels(model) + checkField := func(path string) error { + if effort := gjson.GetBytes(payload, path); effort.Exists() { + if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok { + return statusErr{ + code: http.StatusBadRequest, + msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")), + } + } + } + return nil + } + + if err := checkField("reasoning_effort"); err != nil { + return err + } + if err := checkField("reasoning.effort"); err != nil { + return err + } + return nil +} diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index f060cb61..2b8d0e50 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -51,10 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return resp, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" @@ -126,10 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return nil, errValidate + } toolsResult := gjson.GetBytes(body, "tools") // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // This will have no real consequences. It's just to scare Qwen3. diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go new file mode 100644 index 00000000..b0c5a0a2 --- /dev/null +++ b/internal/util/claude_thinking.go @@ -0,0 +1,46 @@ +package util + +import ( + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload. +// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget. +// If budget is nil or the payload already has thinking config, it returns the payload unchanged. +func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte { + if budget == nil { + return body + } + if gjson.GetBytes(body, "thinking").Exists() { + return body + } + if *budget <= 0 { + return body + } + updated := body + updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled") + updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget) + return updated +} + +// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models. +// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget. +// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched. +func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) { + budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata) + if !matched { + return nil, false + } + if include != nil && !*include { + return nil, true + } + if budget == nil { + return nil, true + } + normalized := NormalizeThinkingBudget(modelName, *budget) + if normalized <= 0 { + return nil, true + } + return &normalized, true +} diff --git a/internal/util/thinking.go b/internal/util/thinking.go index c16b91cd..9671f20b 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -1,6 +1,8 @@ package util import ( + "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" ) @@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero } return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed } + +// GetModelThinkingLevels returns the discrete reasoning effort levels for the model. +// Returns nil if the model has no thinking support or no levels defined. +func GetModelThinkingLevels(model string) []string { + if model == "" { + return nil + } + info := registry.GetGlobalRegistry().GetModelInfo(model) + if info == nil || info.Thinking == nil { + return nil + } + return info.Thinking.Levels +} + +// ModelUsesThinkingLevels reports whether the model uses discrete reasoning +// effort levels instead of numeric budgets. +func ModelUsesThinkingLevels(model string) bool { + levels := GetModelThinkingLevels(model) + return len(levels) > 0 +} + +// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort +// level for the given model. Returns false when the level is not supported. +func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { + levels := GetModelThinkingLevels(model) + if len(levels) == 0 { + return "", false + } + loweredEffort := strings.ToLower(strings.TrimSpace(effort)) + for _, lvl := range levels { + if strings.ToLower(lvl) == loweredEffort { + return lvl, true + } + } + return "", false +} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index e3fd9136..c9a68534 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -14,61 +14,59 @@ const ( ) // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns -// the normalized base model with extracted metadata. Supported patterns: -// - "-thinking-" extracts a numeric budget -// - "-thinking-" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none) -// - "-thinking" maps to a default reasoning effort of "medium" -// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true -// - "-nothinking" maps to budget=0 and include_thoughts=false +// the normalized base model with extracted metadata. Supported pattern: +// - "[]" where value can be: +// - A numeric budget (e.g., "[8192]", "[16384]") +// - A reasoning effort level (e.g., "[high]", "[medium]", "[low]") +// +// Examples: +// - "claude-sonnet-4-5-20250929[16384]" → budget=16384 +// - "gpt-5.1[high]" → reasoning_effort="high" +// - "gemini-2.5-pro[32768]" → budget=32768 +// +// Note: Empty brackets "[]" are not supported and will be ignored. func NormalizeThinkingModel(modelName string) (string, map[string]any) { if modelName == "" { return modelName, nil } - lower := strings.ToLower(modelName) baseModel := modelName var ( budgetOverride *int - includeThoughts *bool reasoningEffort *string matched bool ) - switch { - case strings.HasSuffix(lower, "-nothinking"): - baseModel = modelName[:len(modelName)-len("-nothinking")] - budget := 0 - include := false - budgetOverride = &budget - includeThoughts = &include - matched = true - case strings.HasSuffix(lower, "-reasoning"): - baseModel = modelName[:len(modelName)-len("-reasoning")] - budget := -1 - include := true - budgetOverride = &budget - includeThoughts = &include - matched = true - default: - if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 { - value := modelName[idx+len("-thinking-"):] - if value != "" { - if parsed, ok := parseIntPrefix(value); ok { - baseModel = modelName[:idx] - budgetOverride = &parsed - matched = true - } else if effort, okEffort := normalizeReasoningEffort(value); okEffort { - baseModel = modelName[:idx] - reasoningEffort = &effort - matched = true - } - } - } else if strings.HasSuffix(lower, "-thinking") { - baseModel = modelName[:len(modelName)-len("-thinking")] - effort := "medium" - reasoningEffort = &effort + // Match "[value]" pattern at the end of the model name + if idx := strings.LastIndex(modelName, "["); idx != -1 { + if !strings.HasSuffix(modelName, "]") { + // Incomplete bracket, ignore + return baseModel, nil + } + + value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ] + if value == "" { + // Empty brackets not supported + return baseModel, nil + } + + candidateBase := modelName[:idx] + + // Auto-detect: pure numeric → budget, string → reasoning effort level + if parsed, ok := parseIntPrefix(value); ok { + // Numeric value: treat as thinking budget + baseModel = candidateBase + budgetOverride = &parsed matched = true + } else { + // String value: treat as reasoning effort level + baseModel = candidateBase + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + matched = true + } } } @@ -82,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { if budgetOverride != nil { metadata[ThinkingBudgetMetadataKey] = *budgetOverride } - if includeThoughts != nil { - metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts - } if reasoningEffort != nil { metadata[ReasoningEffortMetadataKey] = *reasoningEffort } @@ -185,7 +180,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { return "", false } if effort != nil && *effort != "" { - return *effort, true + return strings.ToLower(strings.TrimSpace(*effort)), true } if budget != nil { switch *budget { @@ -207,7 +202,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) { if effort == "" { return 0, false } - switch strings.ToLower(effort) { + normalized, ok := NormalizeReasoningEffortLevel(model, effort) + if !ok { + normalized = strings.ToLower(strings.TrimSpace(effort)) + } + switch normalized { case "none": return 0, true case "auto": @@ -312,16 +311,3 @@ func parseNumberToInt(raw any) (int, bool) { } return 0, false } - -func normalizeReasoningEffort(value string) (string, bool) { - if value == "" { - return "", false - } - effort := strings.ToLower(strings.TrimSpace(value)) - switch effort { - case "minimal", "low", "medium", "high", "xhigh", "auto", "none": - return effort, true - default: - return "", false - } -}