From a03d514095c4f76d7d5bf986bd1e109854e2868f Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:28:44 +0800 Subject: [PATCH 01/11] feat(registry): add thinking metadata for models --- internal/registry/model_definitions.go | 13 +++++++++++++ internal/registry/model_registry.go | 3 +++ 2 files changed, 16 insertions(+) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 2f87f195..9956d964 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.5 Haiku", ContextLength: 200000, MaxCompletionTokens: 64000, + // Thinking: not supported for Haiku models }, { ID: "claude-sonnet-4-5-20250929", @@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4.1 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-opus-4-20250514", @@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Opus", ContextLength: 200000, MaxCompletionTokens: 32000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-sonnet-4-20250514", @@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 4 Sonnet", ContextLength: 200000, MaxCompletionTokens: 64000, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-3-7-sonnet-20250219", @@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 3.7 Sonnet", ContextLength: 128000, MaxCompletionTokens: 8192, + Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true}, }, { ID: "claude-3-5-haiku-20241022", @@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo { DisplayName: "Claude 3.5 Haiku", ContextLength: 128000, MaxCompletionTokens: 8192, + // Thinking: not supported for Haiku models }, } } @@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}}, }, { ID: "gpt-5-codex", @@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5-codex-mini", @@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5.1", @@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}}, }, { ID: "gpt-5.1-codex", @@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5.1-codex-mini", @@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}, }, { ID: "gpt-5.1-codex-max", @@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo { ContextLength: 400000, MaxCompletionTokens: 128000, SupportedParameters: []string{"tools"}, + Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}}, }, } } diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go index 5ef9007f..f3517bde 100644 --- a/internal/registry/model_registry.go +++ b/internal/registry/model_registry.go @@ -63,6 +63,9 @@ type ThinkingSupport struct { ZeroAllowed bool `json:"zero_allowed,omitempty"` // DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget). DynamicAllowed bool `json:"dynamic_allowed,omitempty"` + // Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high"). + // When set, the model uses level-based reasoning instead of token budgets. + Levels []string `json:"levels,omitempty"` } // ModelRegistration tracks a model's availability From 3ffd120ae9e9ce2bf34cc87c9994150ec4474ff6 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:51:33 +0800 Subject: [PATCH 02/11] feat(runtime): add thinking config normalization --- internal/runtime/executor/codex_executor.go | 2 + .../executor/openai_compat_executor.go | 8 ++- internal/runtime/executor/payload_helpers.go | 57 +++++++++++++++++++ internal/util/thinking.go | 46 +++++++++++++++ 4 files changed, 111 insertions(+), 2 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 46a30177..3fe5ed6e 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -55,6 +55,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = normalizeThinkingConfig(body, upstreamModel) body = applyPayloadConfig(e.cfg, req.Model, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "stream", true) @@ -149,6 +150,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = normalizeThinkingConfig(body, upstreamModel) body = applyPayloadConfig(e.cfg, req.Model, body) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "model", upstreamModel) diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 93122c20..ba47750e 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -59,9 +59,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } + translated = normalizeThinkingConfig(translated, upstreamModel) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -148,9 +150,11 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } + translated = normalizeThinkingConfig(translated, upstreamModel) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 37e3141a..9d431f11 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -232,3 +232,60 @@ func matchModelPattern(pattern, model string) bool { } return pi == len(pattern) } + +// normalizeThinkingConfig normalizes thinking-related fields in the payload +// based on model capabilities. For models without thinking support, it strips +// reasoning fields. For models with level-based thinking, it validates and +// normalizes the reasoning effort level. +func normalizeThinkingConfig(payload []byte, model string) []byte { + if len(payload) == 0 || model == "" { + return payload + } + + if !util.ModelSupportsThinking(model) { + return stripThinkingFields(payload) + } + + if util.ModelUsesThinkingLevels(model) { + return normalizeReasoningEffortLevel(payload, model) + } + + return payload +} + +// stripThinkingFields removes thinking-related fields from the payload for +// models that do not support thinking. +func stripThinkingFields(payload []byte) []byte { + fieldsToRemove := []string{ + "reasoning", + "reasoning_effort", + "reasoning.effort", + } + out := payload + for _, field := range fieldsToRemove { + if gjson.GetBytes(out, field).Exists() { + out, _ = sjson.DeleteBytes(out, field) + } + } + return out +} + +// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort +// or reasoning.effort field for level-based thinking models. +func normalizeReasoningEffortLevel(payload []byte, model string) []byte { + out := payload + + if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() { + if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { + out, _ = sjson.SetBytes(out, "reasoning_effort", normalized) + } + } + + if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() { + if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok { + out, _ = sjson.SetBytes(out, "reasoning.effort", normalized) + } + } + + return out +} diff --git a/internal/util/thinking.go b/internal/util/thinking.go index c16b91cd..37200980 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -1,6 +1,8 @@ package util import ( + "strings" + "github.com/router-for-me/CLIProxyAPI/v6/internal/registry" ) @@ -67,3 +69,47 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero } return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed } + +// GetModelThinkingLevels returns the discrete reasoning effort levels for the model. +// Returns nil if the model has no thinking support or no levels defined. +func GetModelThinkingLevels(model string) []string { + if model == "" { + return nil + } + info := registry.GetGlobalRegistry().GetModelInfo(model) + if info == nil || info.Thinking == nil { + return nil + } + return info.Thinking.Levels +} + +// ModelUsesThinkingLevels reports whether the model uses discrete reasoning +// effort levels instead of numeric budgets. +func ModelUsesThinkingLevels(model string) bool { + levels := GetModelThinkingLevels(model) + return len(levels) > 0 +} + +// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort +// level for the given model. If the level is not supported, it returns the +// first (lowest) level from the model's supported levels. +func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { + levels := GetModelThinkingLevels(model) + if len(levels) == 0 { + return "", false + } + loweredEffort := strings.ToLower(strings.TrimSpace(effort)) + for _, lvl := range levels { + if strings.ToLower(lvl) == loweredEffort { + return lvl, true + } + } + return defaultReasoningLevel(levels), true +} + +func defaultReasoningLevel(levels []string) string { + if len(levels) > 0 { + return levels[0] + } + return "" +} From d06d0eab2f12af290453c17d8cb24e595792751a Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:14:51 +0800 Subject: [PATCH 03/11] fix(util): centralize reasoning effort normalization --- internal/util/thinking.go | 42 ++++++++++++++++++++++++++++++++ internal/util/thinking_suffix.go | 26 ++++++++------------ 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/internal/util/thinking.go b/internal/util/thinking.go index 37200980..bcf92c5b 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -113,3 +113,45 @@ func defaultReasoningLevel(levels []string) string { } return "" } + +// standardReasoningEfforts defines the canonical set of reasoning effort levels. +// This serves as the single source of truth for valid effort values. +var standardReasoningEfforts = []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"} + +// IsValidReasoningEffort checks if the given effort string is a valid reasoning effort level. +// This is a registry-independent check against the standard effort levels. +func IsValidReasoningEffort(effort string) bool { + if effort == "" { + return false + } + lowered := strings.ToLower(strings.TrimSpace(effort)) + for _, e := range standardReasoningEfforts { + if e == lowered { + return true + } + } + return false +} + +// NormalizeReasoningEffort normalizes a reasoning effort string to its canonical form. +// It first tries registry-based normalization if a model is provided, then falls back +// to the standard effort levels. Returns empty string and false if invalid. +func NormalizeReasoningEffort(model, effort string) (string, bool) { + if effort == "" { + return "", false + } + lowered := strings.ToLower(strings.TrimSpace(effort)) + + if model != "" { + if normalized, ok := NormalizeReasoningEffortLevel(model, effort); ok { + return normalized, true + } + } + + for _, e := range standardReasoningEfforts { + if e == lowered { + return e, true + } + } + return "", false +} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index e3fd9136..1a1a8715 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -58,8 +58,9 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { baseModel = modelName[:idx] budgetOverride = &parsed matched = true - } else if effort, okEffort := normalizeReasoningEffort(value); okEffort { + } else if IsValidReasoningEffort(value) { baseModel = modelName[:idx] + effort := strings.ToLower(strings.TrimSpace(value)) reasoningEffort = &effort matched = true } @@ -185,7 +186,9 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { return "", false } if effort != nil && *effort != "" { - return *effort, true + if IsValidReasoningEffort(*effort) { + return strings.ToLower(strings.TrimSpace(*effort)), true + } } if budget != nil { switch *budget { @@ -207,7 +210,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) { if effort == "" { return 0, false } - switch strings.ToLower(effort) { + normalized, ok := NormalizeReasoningEffort(model, effort) + if !ok { + return 0, false + } + switch normalized { case "none": return 0, true case "auto": @@ -312,16 +319,3 @@ func parseNumberToInt(raw any) (int, bool) { } return 0, false } - -func normalizeReasoningEffort(value string) (string, bool) { - if value == "" { - return "", false - } - effort := strings.ToLower(strings.TrimSpace(value)) - switch effort { - case "minimal", "low", "medium", "high", "xhigh", "auto", "none": - return effort, true - default: - return "", false - } -} From 169f4295d041b0c2e1089d02073740c36f83e8bf Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:20:12 +0800 Subject: [PATCH 04/11] fix(util): align reasoning effort handling with registry --- internal/util/thinking.go | 42 -------------------------------- internal/util/thinking_suffix.go | 19 ++++++++------- 2 files changed, 10 insertions(+), 51 deletions(-) diff --git a/internal/util/thinking.go b/internal/util/thinking.go index bcf92c5b..37200980 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -113,45 +113,3 @@ func defaultReasoningLevel(levels []string) string { } return "" } - -// standardReasoningEfforts defines the canonical set of reasoning effort levels. -// This serves as the single source of truth for valid effort values. -var standardReasoningEfforts = []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"} - -// IsValidReasoningEffort checks if the given effort string is a valid reasoning effort level. -// This is a registry-independent check against the standard effort levels. -func IsValidReasoningEffort(effort string) bool { - if effort == "" { - return false - } - lowered := strings.ToLower(strings.TrimSpace(effort)) - for _, e := range standardReasoningEfforts { - if e == lowered { - return true - } - } - return false -} - -// NormalizeReasoningEffort normalizes a reasoning effort string to its canonical form. -// It first tries registry-based normalization if a model is provided, then falls back -// to the standard effort levels. Returns empty string and false if invalid. -func NormalizeReasoningEffort(model, effort string) (string, bool) { - if effort == "" { - return "", false - } - lowered := strings.ToLower(strings.TrimSpace(effort)) - - if model != "" { - if normalized, ok := NormalizeReasoningEffortLevel(model, effort); ok { - return normalized, true - } - } - - for _, e := range standardReasoningEfforts { - if e == lowered { - return e, true - } - } - return "", false -} diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index 1a1a8715..c2d806ad 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -58,11 +58,14 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { baseModel = modelName[:idx] budgetOverride = &parsed matched = true - } else if IsValidReasoningEffort(value) { + } else { baseModel = modelName[:idx] - effort := strings.ToLower(strings.TrimSpace(value)) - reasoningEffort = &effort - matched = true + if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok { + reasoningEffort = &normalized + matched = true + } else { + baseModel = modelName + } } } } else if strings.HasSuffix(lower, "-thinking") { @@ -186,9 +189,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) { return "", false } if effort != nil && *effort != "" { - if IsValidReasoningEffort(*effort) { - return strings.ToLower(strings.TrimSpace(*effort)), true - } + return strings.ToLower(strings.TrimSpace(*effort)), true } if budget != nil { switch *budget { @@ -210,9 +211,9 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) { if effort == "" { return 0, false } - normalized, ok := NormalizeReasoningEffort(model, effort) + normalized, ok := NormalizeReasoningEffortLevel(model, effort) if !ok { - return 0, false + normalized = strings.ToLower(strings.TrimSpace(effort)) } switch normalized { case "none": From 519da2e04222641a412fb5c17a0bc2cf20428800 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 12:36:54 +0800 Subject: [PATCH 05/11] fix(runtime): validate reasoning effort levels --- internal/runtime/executor/codex_executor.go | 6 ++++ .../executor/openai_compat_executor.go | 6 ++++ internal/runtime/executor/payload_helpers.go | 35 +++++++++++++++++++ internal/util/thinking.go | 12 ++----- 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 3fe5ed6e..7003373f 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -56,6 +56,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return resp, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.SetBytes(body, "stream", true) @@ -151,6 +154,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return nil, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "model", upstreamModel) diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index ba47750e..507b0fd9 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -64,6 +64,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } translated = normalizeThinkingConfig(translated, upstreamModel) + if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { + return resp, errValidate + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) @@ -155,6 +158,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated, _ = sjson.SetBytes(translated, "model", upstreamModel) } translated = normalizeThinkingConfig(translated, upstreamModel) + if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil { + return nil, errValidate + } url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated)) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 9d431f11..5711bbbd 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -1,6 +1,8 @@ package executor import ( + "fmt" + "net/http" "strings" "github.com/router-for-me/CLIProxyAPI/v6/internal/config" @@ -289,3 +291,36 @@ func normalizeReasoningEffortLevel(payload []byte, model string) []byte { return out } + +// validateThinkingConfig checks for unsupported reasoning levels on level-based models. +// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently +// downgrading requests. +func validateThinkingConfig(payload []byte, model string) error { + if len(payload) == 0 || model == "" { + return nil + } + if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) { + return nil + } + + levels := util.GetModelThinkingLevels(model) + checkField := func(path string) error { + if effort := gjson.GetBytes(payload, path); effort.Exists() { + if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok { + return statusErr{ + code: http.StatusBadRequest, + msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")), + } + } + } + return nil + } + + if err := checkField("reasoning_effort"); err != nil { + return err + } + if err := checkField("reasoning.effort"); err != nil { + return err + } + return nil +} diff --git a/internal/util/thinking.go b/internal/util/thinking.go index 37200980..9671f20b 100644 --- a/internal/util/thinking.go +++ b/internal/util/thinking.go @@ -91,8 +91,7 @@ func ModelUsesThinkingLevels(model string) bool { } // NormalizeReasoningEffortLevel validates and normalizes a reasoning effort -// level for the given model. If the level is not supported, it returns the -// first (lowest) level from the model's supported levels. +// level for the given model. Returns false when the level is not supported. func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { levels := GetModelThinkingLevels(model) if len(levels) == 0 { @@ -104,12 +103,5 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) { return lvl, true } } - return defaultReasoningLevel(levels), true -} - -func defaultReasoningLevel(levels []string) string { - if len(levels) > 0 { - return levels[0] - } - return "" + return "", false } From 3a81ab22fdb6c9b993fac1deef94785f8a8f5dbf Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 14:35:05 +0800 Subject: [PATCH 06/11] fix(runtime): unify reasoning effort metadata overrides --- internal/runtime/executor/codex_executor.go | 6 ++-- internal/runtime/executor/iflow_executor.go | 4 +-- .../executor/openai_compat_executor.go | 4 +-- internal/runtime/executor/payload_helpers.go | 30 +++------------- internal/runtime/executor/qwen_executor.go | 4 +-- internal/util/thinking_suffix.go | 34 ++++++++++++++++--- 6 files changed, 44 insertions(+), 38 deletions(-) diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go index 7003373f..b9470b3c 100644 --- a/internal/runtime/executor/codex_executor.go +++ b/internal/runtime/executor/codex_executor.go @@ -54,7 +54,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body = normalizeThinkingConfig(body, upstreamModel) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return resp, errValidate @@ -152,7 +152,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("codex") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body = normalizeThinkingConfig(body, upstreamModel) if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { return nil, errValidate @@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth modelForCounting := req.Model - body = applyReasoningEffortMetadata(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort") body, _ = sjson.SetBytes(body, "model", upstreamModel) body, _ = sjson.DeleteBytes(body, "previous_response_id") body, _ = sjson.SetBytes(body, "stream", false) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index c68a6431..a445e47d 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -57,7 +57,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -143,7 +143,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go index 507b0fd9..68b2963a 100644 --- a/internal/runtime/executor/openai_compat_executor.go +++ b/internal/runtime/executor/openai_compat_executor.go @@ -58,7 +58,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) @@ -152,7 +152,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy translated = e.overrideModel(translated, modelOverride) } translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated) - translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model) + translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort") upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) if upstreamModel != "" { translated, _ = sjson.SetBytes(translated, "model", upstreamModel) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 5711bbbd..61486d62 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -45,40 +45,20 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) } -// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata. -// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking. -func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte { +// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. +// Metadata values take precedence over any existing field when the model supports thinking. +func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte { if len(metadata) == 0 { return payload } if !util.ModelSupportsThinking(model) { return payload } - if gjson.GetBytes(payload, "reasoning.effort").Exists() { + if field == "" { return payload } if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil { - return updated - } - } - return payload -} - -// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field) -// when present in metadata. It avoids overwriting an existing reasoning_effort field. -func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte { - if len(metadata) == 0 { - return payload - } - if !util.ModelSupportsThinking(model) { - return payload - } - if gjson.GetBytes(payload, "reasoning_effort").Exists() { - return payload - } - if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" { - if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil { + if updated, err := sjson.SetBytes(payload, field, effort); err == nil { return updated } } diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index f060cb61..d25ed5da 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -51,7 +51,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req from := opts.SourceFormat to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } @@ -126,7 +126,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) - body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model) + body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index c2d806ad..47ce42f7 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -55,16 +55,42 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { value := modelName[idx+len("-thinking-"):] if value != "" { if parsed, ok := parseIntPrefix(value); ok { - baseModel = modelName[:idx] - budgetOverride = &parsed - matched = true + candidateBase := modelName[:idx] + if ModelUsesThinkingLevels(candidateBase) { + baseModel = candidateBase + // Numeric suffix on level-aware models should still surface as reasoning effort metadata. + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + } + matched = true + } else { + baseModel = candidateBase + budgetOverride = &parsed + matched = true + } } else { baseModel = modelName[:idx] if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok { reasoningEffort = &normalized matched = true + } else if !ModelUsesThinkingLevels(baseModel) { + // Keep unknown effort tokens so callers can honor user intent even without normalization. + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + matched = true + } else { + baseModel = modelName + } } else { - baseModel = modelName + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw + matched = true + } else { + baseModel = modelName + } } } } From 007572b58e2e6577f3c9a9a83d946e3b9c757437 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 15:52:14 +0800 Subject: [PATCH 07/11] fix(util): do not strip thinking suffix on registered models NormalizeThinkingModel now checks ModelSupportsThinking before removing "-thinking" or "-thinking-", avoiding accidental parsing of model names where the suffix is part of the official id (e.g., kimi-k2-thinking, qwen3-235b-a22b-thinking-2507). The registry adds ThinkingSupport metadata for several models and propagates it via ModelInfo (e.g., kimi-k2-thinking, deepseek-r1, qwen3-235b-a22b-thinking-2507, minimax-m2), enabling accurate detection of thinking-capable models and correcting base model inference. --- internal/registry/model_definitions.go | 10 ++++++---- internal/util/thinking_suffix.go | 19 +++++++++++++++---- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go index 9956d964..adaff867 100644 --- a/internal/registry/model_definitions.go +++ b/internal/registry/model_definitions.go @@ -623,6 +623,7 @@ func GetIFlowModels() []*ModelInfo { DisplayName string Description string Created int64 + Thinking *ThinkingSupport }{ {ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600}, {ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800}, @@ -632,17 +633,17 @@ func GetIFlowModels() []*ModelInfo { {ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400}, {ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400}, {ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000}, - {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200}, + {ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, {ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000}, {ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000}, {ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200}, - {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200}, + {ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, {ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200}, {ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400}, - {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600}, + {ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, {ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600}, {ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600}, - {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000}, + {ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}}, } models := make([]*ModelInfo, 0, len(entries)) for _, entry := range entries { @@ -654,6 +655,7 @@ func GetIFlowModels() []*ModelInfo { Type: "iflow", DisplayName: entry.DisplayName, Description: entry.Description, + Thinking: entry.Thinking, }) } return models diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index 47ce42f7..ef8302b0 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -52,6 +52,11 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { matched = true default: if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 { + // Skip stripping if the original model is a registered thinking model. + // This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed. + if ModelSupportsThinking(modelName) { + break + } value := modelName[idx+len("-thinking-"):] if value != "" { if parsed, ok := parseIntPrefix(value); ok { @@ -95,10 +100,16 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { } } } else if strings.HasSuffix(lower, "-thinking") { - baseModel = modelName[:len(modelName)-len("-thinking")] - effort := "medium" - reasoningEffort = &effort - matched = true + candidateBase := modelName[:len(modelName)-len("-thinking")] + // Only strip the suffix if the original model is NOT a registered thinking model. + // This prevents stripping "-thinking" from models like "kimi-k2-thinking" where + // the suffix is part of the model's actual name. + if !ModelSupportsThinking(modelName) { + baseModel = candidateBase + effort := "medium" + reasoningEffort = &effort + matched = true + } } } From f6300c72b790c6017a08ceacc425f9863907493d Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 16:21:50 +0800 Subject: [PATCH 08/11] fix(runtime): validate thinking config in iflow and qwen --- internal/runtime/executor/iflow_executor.go | 14 ++++++++++++-- internal/runtime/executor/qwen_executor.go | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go index a445e47d..d1a69812 100644 --- a/internal/runtime/executor/iflow_executor.go +++ b/internal/runtime/executor/iflow_executor.go @@ -58,9 +58,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return resp, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint @@ -144,9 +149,14 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return nil, errValidate + } // Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour. toolsResult := gjson.GetBytes(body, "tools") if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 { diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go index d25ed5da..2b8d0e50 100644 --- a/internal/runtime/executor/qwen_executor.go +++ b/internal/runtime/executor/qwen_executor.go @@ -52,9 +52,14 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req to := sdktranslator.FromString("openai") body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return resp, errValidate + } body = applyPayloadConfig(e.cfg, req.Model, body) url := strings.TrimSuffix(baseURL, "/") + "/chat/completions" @@ -127,9 +132,14 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort") - if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" { + upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata) + if upstreamModel != "" { body, _ = sjson.SetBytes(body, "model", upstreamModel) } + body = normalizeThinkingConfig(body, upstreamModel) + if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil { + return nil, errValidate + } toolsResult := gjson.GetBytes(body, "tools") // I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response. // This will have no real consequences. It's just to scare Qwen3. From 21bbceca0ce75e651f9dd0a29a681f2c580c661f Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 16:35:36 +0800 Subject: [PATCH 09/11] docs(runtime): document reasoning effort precedence --- internal/runtime/executor/payload_helpers.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 61486d62..9c45681a 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -46,7 +46,8 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str } // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path. -// Metadata values take precedence over any existing field when the model supports thinking. +// Metadata values take precedence over any existing field when the model supports thinking, intentionally +// overwriting caller-provided values to honor suffix/default metadata priority. func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte { if len(metadata) == 0 { return payload From 6285459c08e9f6f5996374085053892d2d5b91fa Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 17:20:44 +0800 Subject: [PATCH 10/11] fix(runtime): unify claude thinking config resolution --- internal/runtime/executor/claude_executor.go | 56 +++----------------- internal/util/claude_thinking.go | 46 ++++++++++++++++ 2 files changed, 52 insertions(+), 50 deletions(-) create mode 100644 internal/util/claude_thinking.go diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go index c7470954..6af08608 100644 --- a/internal/runtime/executor/claude_executor.go +++ b/internal/runtime/executor/claude_executor.go @@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) { return betas, body } -// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes. +// injectThinkingConfig adds thinking configuration based on metadata using the unified flow. +// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata +// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini. func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte { - // Only inject if thinking config is not already present - if gjson.GetBytes(body, "thinking").Exists() { + budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata) + if !ok { return body } - - budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata) - if !ok || budgetTokens <= 0 { - return body - } - - body, _ = sjson.SetBytes(body, "thinking.type", "enabled") - body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens) - return body -} - -func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) { - budget, include, effort, matched := util.ThinkingFromMetadata(metadata) - if matched { - if include != nil && !*include { - return 0, false - } - if budget != nil { - normalized := util.NormalizeThinkingBudget(modelName, *budget) - if normalized > 0 { - return normalized, true - } - return 0, false - } - if effort != nil { - if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 { - return derived, true - } - } - } - return claudeBudgetFromSuffix(modelName) -} - -func claudeBudgetFromSuffix(modelName string) (int, bool) { - lower := strings.ToLower(strings.TrimSpace(modelName)) - switch { - case strings.HasSuffix(lower, "-thinking-low"): - return 1024, true - case strings.HasSuffix(lower, "-thinking-medium"): - return 8192, true - case strings.HasSuffix(lower, "-thinking-high"): - return 24576, true - case strings.HasSuffix(lower, "-thinking"): - return 8192, true - default: - return 0, false - } + return util.ApplyClaudeThinkingConfig(body, budget) } // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled. diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go new file mode 100644 index 00000000..b0c5a0a2 --- /dev/null +++ b/internal/util/claude_thinking.go @@ -0,0 +1,46 @@ +package util + +import ( + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload. +// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget. +// If budget is nil or the payload already has thinking config, it returns the payload unchanged. +func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte { + if budget == nil { + return body + } + if gjson.GetBytes(body, "thinking").Exists() { + return body + } + if *budget <= 0 { + return body + } + updated := body + updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled") + updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget) + return updated +} + +// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models. +// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget. +// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched. +func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) { + budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata) + if !matched { + return nil, false + } + if include != nil && !*include { + return nil, true + } + if budget == nil { + return nil, true + } + normalized := NormalizeThinkingBudget(modelName, *budget) + if normalized <= 0 { + return nil, true + } + return &normalized, true +} From facfe7c518cb528426dcb82c7f927e4f151bea33 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Thu, 11 Dec 2025 18:17:28 +0800 Subject: [PATCH 11/11] refactor(thinking): use bracket tags for thinking meta Align thinking suffix handling on a single bracket-style marker. NormalizeThinkingModel strips a terminal `[value]` segment from model identifiers and turns it into either a thinking budget (for numeric values) or a reasoning effort hint (for strings). Emission of `ThinkingIncludeThoughtsMetadataKey` is removed. Executor helpers and the example config are updated so their comments reference the new `[value]` suffix format instead of the legacy dash variants. BREAKING CHANGE: dash-based thinking suffixes (`-thinking`, `-thinking-N`, `-reasoning`, `-nothinking`) are no longer parsed for thinking metadata; only `[value]` annotations are recognized. --- config.example.yaml | 2 +- internal/runtime/executor/payload_helpers.go | 4 +- internal/util/thinking_suffix.go | 122 ++++++------------- 3 files changed, 41 insertions(+), 87 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index dfd7454b..31f16973 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -100,7 +100,7 @@ ws-auth: false # excluded-models: # - "claude-opus-4-5-20251101" # exclude specific models (exact match) # - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219) -# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking) +# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking) # - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022) # OpenAI compatibility providers diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go index 9c45681a..be249868 100644 --- a/internal/runtime/executor/payload_helpers.go +++ b/internal/runtime/executor/payload_helpers.go @@ -11,7 +11,7 @@ import ( "github.com/tidwall/sjson" ) -// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) +// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192]) // for standard Gemini format payloads. It normalizes the budget when the model supports thinking. func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte { budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) @@ -28,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) } -// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N) +// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192]) // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking. func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte { budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata) diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go index ef8302b0..c9a68534 100644 --- a/internal/util/thinking_suffix.go +++ b/internal/util/thinking_suffix.go @@ -14,100 +14,57 @@ const ( ) // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns -// the normalized base model with extracted metadata. Supported patterns: -// - "-thinking-" extracts a numeric budget -// - "-thinking-" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none) -// - "-thinking" maps to a default reasoning effort of "medium" -// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true -// - "-nothinking" maps to budget=0 and include_thoughts=false +// the normalized base model with extracted metadata. Supported pattern: +// - "[]" where value can be: +// - A numeric budget (e.g., "[8192]", "[16384]") +// - A reasoning effort level (e.g., "[high]", "[medium]", "[low]") +// +// Examples: +// - "claude-sonnet-4-5-20250929[16384]" → budget=16384 +// - "gpt-5.1[high]" → reasoning_effort="high" +// - "gemini-2.5-pro[32768]" → budget=32768 +// +// Note: Empty brackets "[]" are not supported and will be ignored. func NormalizeThinkingModel(modelName string) (string, map[string]any) { if modelName == "" { return modelName, nil } - lower := strings.ToLower(modelName) baseModel := modelName var ( budgetOverride *int - includeThoughts *bool reasoningEffort *string matched bool ) - switch { - case strings.HasSuffix(lower, "-nothinking"): - baseModel = modelName[:len(modelName)-len("-nothinking")] - budget := 0 - include := false - budgetOverride = &budget - includeThoughts = &include - matched = true - case strings.HasSuffix(lower, "-reasoning"): - baseModel = modelName[:len(modelName)-len("-reasoning")] - budget := -1 - include := true - budgetOverride = &budget - includeThoughts = &include - matched = true - default: - if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 { - // Skip stripping if the original model is a registered thinking model. - // This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed. - if ModelSupportsThinking(modelName) { - break - } - value := modelName[idx+len("-thinking-"):] - if value != "" { - if parsed, ok := parseIntPrefix(value); ok { - candidateBase := modelName[:idx] - if ModelUsesThinkingLevels(candidateBase) { - baseModel = candidateBase - // Numeric suffix on level-aware models should still surface as reasoning effort metadata. - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - } - matched = true - } else { - baseModel = candidateBase - budgetOverride = &parsed - matched = true - } - } else { - baseModel = modelName[:idx] - if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok { - reasoningEffort = &normalized - matched = true - } else if !ModelUsesThinkingLevels(baseModel) { - // Keep unknown effort tokens so callers can honor user intent even without normalization. - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - matched = true - } else { - baseModel = modelName - } - } else { - raw := strings.ToLower(strings.TrimSpace(value)) - if raw != "" { - reasoningEffort = &raw - matched = true - } else { - baseModel = modelName - } - } - } - } - } else if strings.HasSuffix(lower, "-thinking") { - candidateBase := modelName[:len(modelName)-len("-thinking")] - // Only strip the suffix if the original model is NOT a registered thinking model. - // This prevents stripping "-thinking" from models like "kimi-k2-thinking" where - // the suffix is part of the model's actual name. - if !ModelSupportsThinking(modelName) { - baseModel = candidateBase - effort := "medium" - reasoningEffort = &effort + // Match "[value]" pattern at the end of the model name + if idx := strings.LastIndex(modelName, "["); idx != -1 { + if !strings.HasSuffix(modelName, "]") { + // Incomplete bracket, ignore + return baseModel, nil + } + + value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ] + if value == "" { + // Empty brackets not supported + return baseModel, nil + } + + candidateBase := modelName[:idx] + + // Auto-detect: pure numeric → budget, string → reasoning effort level + if parsed, ok := parseIntPrefix(value); ok { + // Numeric value: treat as thinking budget + baseModel = candidateBase + budgetOverride = &parsed + matched = true + } else { + // String value: treat as reasoning effort level + baseModel = candidateBase + raw := strings.ToLower(strings.TrimSpace(value)) + if raw != "" { + reasoningEffort = &raw matched = true } } @@ -123,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) { if budgetOverride != nil { metadata[ThinkingBudgetMetadataKey] = *budgetOverride } - if includeThoughts != nil { - metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts - } if reasoningEffort != nil { metadata[ReasoningEffortMetadataKey] = *reasoningEffort }