From a03d514095c4f76d7d5bf986bd1e109854e2868f Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 11:28:44 +0800
Subject: [PATCH 01/11] feat(registry): add thinking metadata for models

---
 internal/registry/model_definitions.go | 13 +++++++++++++
 internal/registry/model_registry.go    |  3 +++
 2 files changed, 16 insertions(+)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 2f87f195..9956d964 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.5 Haiku",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			// Thinking: not supported for Haiku models
 		},
 		{
 			ID:                  "claude-sonnet-4-5-20250929",
@@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4.1 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-opus-4-20250514",
@@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Opus",
 			ContextLength:       200000,
 			MaxCompletionTokens: 32000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-sonnet-4-20250514",
@@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 4 Sonnet",
 			ContextLength:       200000,
 			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-3-7-sonnet-20250219",
@@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.7 Sonnet",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
 			ID:                  "claude-3-5-haiku-20241022",
@@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
 			DisplayName:         "Claude 3.5 Haiku",
 			ContextLength:       128000,
 			MaxCompletionTokens: 8192,
+			// Thinking: not supported for Haiku models
 		},
 	}
 }
@@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex",
@@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5-codex-mini",
@@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1",
@@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex",
@@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-mini",
@@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
 		},
 		{
 			ID:                  "gpt-5.1-codex-max",
@@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
 			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
 		},
 	}
 }
diff --git a/internal/registry/model_registry.go b/internal/registry/model_registry.go
index 5ef9007f..f3517bde 100644
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
 	ZeroAllowed bool `json:"zero_allowed,omitempty"`
 	// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
 	DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
+	// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
+	// When set, the model uses level-based reasoning instead of token budgets.
+	Levels []string `json:"levels,omitempty"`
 }
 
 // ModelRegistration tracks a model's availability

From 3ffd120ae9e9ce2bf34cc87c9994150ec4474ff6 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 11:51:33 +0800
Subject: [PATCH 02/11] feat(runtime): add thinking config normalization

---
 internal/runtime/executor/codex_executor.go   |  2 +
 .../executor/openai_compat_executor.go        |  8 ++-
 internal/runtime/executor/payload_helpers.go  | 57 +++++++++++++++++++
 internal/util/thinking.go                     | 46 +++++++++++++++
 4 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 46a30177..3fe5ed6e 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -55,6 +55,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = normalizeThinkingConfig(body, upstreamModel)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
@@ -149,6 +150,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = normalizeThinkingConfig(body, upstreamModel)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 93122c20..ba47750e 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -59,9 +59,11 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
+	translated = normalizeThinkingConfig(translated, upstreamModel)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -148,9 +150,11 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
 	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
+	translated = normalizeThinkingConfig(translated, upstreamModel)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 37e3141a..9d431f11 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -232,3 +232,60 @@ func matchModelPattern(pattern, model string) bool {
 	}
 	return pi == len(pattern)
 }
+
+// normalizeThinkingConfig normalizes thinking-related fields in the payload
+// based on model capabilities. For models without thinking support, it strips
+// reasoning fields. For models with level-based thinking, it validates and
+// normalizes the reasoning effort level.
+func normalizeThinkingConfig(payload []byte, model string) []byte {
+	if len(payload) == 0 || model == "" {
+		return payload
+	}
+
+	if !util.ModelSupportsThinking(model) {
+		return stripThinkingFields(payload)
+	}
+
+	if util.ModelUsesThinkingLevels(model) {
+		return normalizeReasoningEffortLevel(payload, model)
+	}
+
+	return payload
+}
+
+// stripThinkingFields removes thinking-related fields from the payload for
+// models that do not support thinking.
+func stripThinkingFields(payload []byte) []byte {
+	fieldsToRemove := []string{
+		"reasoning",
+		"reasoning_effort",
+		"reasoning.effort",
+	}
+	out := payload
+	for _, field := range fieldsToRemove {
+		if gjson.GetBytes(out, field).Exists() {
+			out, _ = sjson.DeleteBytes(out, field)
+		}
+	}
+	return out
+}
+
+// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
+// or reasoning.effort field for level-based thinking models.
+func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
+	out := payload
+
+	if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
+		}
+	}
+
+	if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
+		if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
+			out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
+		}
+	}
+
+	return out
+}
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index c16b91cd..37200980 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -1,6 +1,8 @@
 package util
 
 import (
+	"strings"
+
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 )
 
@@ -67,3 +69,47 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 	}
 	return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 }
+
+// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
+// Returns nil if the model has no thinking support or no levels defined.
+func GetModelThinkingLevels(model string) []string {
+	if model == "" {
+		return nil
+	}
+	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	if info == nil || info.Thinking == nil {
+		return nil
+	}
+	return info.Thinking.Levels
+}
+
+// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
+// effort levels instead of numeric budgets.
+func ModelUsesThinkingLevels(model string) bool {
+	levels := GetModelThinkingLevels(model)
+	return len(levels) > 0
+}
+
+// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
+// level for the given model. If the level is not supported, it returns the
+// first (lowest) level from the model's supported levels.
+func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
+	levels := GetModelThinkingLevels(model)
+	if len(levels) == 0 {
+		return "", false
+	}
+	loweredEffort := strings.ToLower(strings.TrimSpace(effort))
+	for _, lvl := range levels {
+		if strings.ToLower(lvl) == loweredEffort {
+			return lvl, true
+		}
+	}
+	return defaultReasoningLevel(levels), true
+}
+
+func defaultReasoningLevel(levels []string) string {
+	if len(levels) > 0 {
+		return levels[0]
+	}
+	return ""
+}

From d06d0eab2f12af290453c17d8cb24e595792751a Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:14:51 +0800
Subject: [PATCH 03/11] fix(util): centralize reasoning effort normalization

---
 internal/util/thinking.go        | 42 ++++++++++++++++++++++++++++++++
 internal/util/thinking_suffix.go | 26 ++++++++------------
 2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index 37200980..bcf92c5b 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -113,3 +113,45 @@ func defaultReasoningLevel(levels []string) string {
 	}
 	return ""
 }
+
+// standardReasoningEfforts defines the canonical set of reasoning effort levels.
+// This serves as the single source of truth for valid effort values.
+var standardReasoningEfforts = []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
+
+// IsValidReasoningEffort checks if the given effort string is a valid reasoning effort level.
+// This is a registry-independent check against the standard effort levels.
+func IsValidReasoningEffort(effort string) bool {
+	if effort == "" {
+		return false
+	}
+	lowered := strings.ToLower(strings.TrimSpace(effort))
+	for _, e := range standardReasoningEfforts {
+		if e == lowered {
+			return true
+		}
+	}
+	return false
+}
+
+// NormalizeReasoningEffort normalizes a reasoning effort string to its canonical form.
+// It first tries registry-based normalization if a model is provided, then falls back
+// to the standard effort levels. Returns empty string and false if invalid.
+func NormalizeReasoningEffort(model, effort string) (string, bool) {
+	if effort == "" {
+		return "", false
+	}
+	lowered := strings.ToLower(strings.TrimSpace(effort))
+
+	if model != "" {
+		if normalized, ok := NormalizeReasoningEffortLevel(model, effort); ok {
+			return normalized, true
+		}
+	}
+
+	for _, e := range standardReasoningEfforts {
+		if e == lowered {
+			return e, true
+		}
+	}
+	return "", false
+}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index e3fd9136..1a1a8715 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -58,8 +58,9 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 					baseModel = modelName[:idx]
 					budgetOverride = &parsed
 					matched = true
-				} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
+				} else if IsValidReasoningEffort(value) {
 					baseModel = modelName[:idx]
+					effort := strings.ToLower(strings.TrimSpace(value))
 					reasoningEffort = &effort
 					matched = true
 				}
@@ -185,7 +186,9 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 		return "", false
 	}
 	if effort != nil && *effort != "" {
-		return *effort, true
+		if IsValidReasoningEffort(*effort) {
+			return strings.ToLower(strings.TrimSpace(*effort)), true
+		}
 	}
 	if budget != nil {
 		switch *budget {
@@ -207,7 +210,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
 	if effort == "" {
 		return 0, false
 	}
-	switch strings.ToLower(effort) {
+	normalized, ok := NormalizeReasoningEffort(model, effort)
+	if !ok {
+		return 0, false
+	}
+	switch normalized {
 	case "none":
 		return 0, true
 	case "auto":
@@ -312,16 +319,3 @@ func parseNumberToInt(raw any) (int, bool) {
 	}
 	return 0, false
 }
-
-func normalizeReasoningEffort(value string) (string, bool) {
-	if value == "" {
-		return "", false
-	}
-	effort := strings.ToLower(strings.TrimSpace(value))
-	switch effort {
-	case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
-		return effort, true
-	default:
-		return "", false
-	}
-}

From 169f4295d041b0c2e1089d02073740c36f83e8bf Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:20:12 +0800
Subject: [PATCH 04/11] fix(util): align reasoning effort handling with
 registry

---
 internal/util/thinking.go        | 42 --------------------------------
 internal/util/thinking_suffix.go | 19 ++++++++-------
 2 files changed, 10 insertions(+), 51 deletions(-)

diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index bcf92c5b..37200980 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -113,45 +113,3 @@ func defaultReasoningLevel(levels []string) string {
 	}
 	return ""
 }
-
-// standardReasoningEfforts defines the canonical set of reasoning effort levels.
-// This serves as the single source of truth for valid effort values.
-var standardReasoningEfforts = []string{"none", "auto", "minimal", "low", "medium", "high", "xhigh"}
-
-// IsValidReasoningEffort checks if the given effort string is a valid reasoning effort level.
-// This is a registry-independent check against the standard effort levels.
-func IsValidReasoningEffort(effort string) bool {
-	if effort == "" {
-		return false
-	}
-	lowered := strings.ToLower(strings.TrimSpace(effort))
-	for _, e := range standardReasoningEfforts {
-		if e == lowered {
-			return true
-		}
-	}
-	return false
-}
-
-// NormalizeReasoningEffort normalizes a reasoning effort string to its canonical form.
-// It first tries registry-based normalization if a model is provided, then falls back
-// to the standard effort levels. Returns empty string and false if invalid.
-func NormalizeReasoningEffort(model, effort string) (string, bool) {
-	if effort == "" {
-		return "", false
-	}
-	lowered := strings.ToLower(strings.TrimSpace(effort))
-
-	if model != "" {
-		if normalized, ok := NormalizeReasoningEffortLevel(model, effort); ok {
-			return normalized, true
-		}
-	}
-
-	for _, e := range standardReasoningEfforts {
-		if e == lowered {
-			return e, true
-		}
-	}
-	return "", false
-}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index 1a1a8715..c2d806ad 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -58,11 +58,14 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 					baseModel = modelName[:idx]
 					budgetOverride = &parsed
 					matched = true
-				} else if IsValidReasoningEffort(value) {
+				} else {
 					baseModel = modelName[:idx]
-					effort := strings.ToLower(strings.TrimSpace(value))
-					reasoningEffort = &effort
-					matched = true
+					if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
+						reasoningEffort = &normalized
+						matched = true
+					} else {
+						baseModel = modelName
+					}
 				}
 			}
 		} else if strings.HasSuffix(lower, "-thinking") {
@@ -186,9 +189,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
 		return "", false
 	}
 	if effort != nil && *effort != "" {
-		if IsValidReasoningEffort(*effort) {
-			return strings.ToLower(strings.TrimSpace(*effort)), true
-		}
+		return strings.ToLower(strings.TrimSpace(*effort)), true
 	}
 	if budget != nil {
 		switch *budget {
@@ -210,9 +211,9 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
 	if effort == "" {
 		return 0, false
 	}
-	normalized, ok := NormalizeReasoningEffort(model, effort)
+	normalized, ok := NormalizeReasoningEffortLevel(model, effort)
 	if !ok {
-		return 0, false
+		normalized = strings.ToLower(strings.TrimSpace(effort))
 	}
 	switch normalized {
 	case "none":

From 519da2e04222641a412fb5c17a0bc2cf20428800 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 12:36:54 +0800
Subject: [PATCH 05/11] fix(runtime): validate reasoning effort levels

---
 internal/runtime/executor/codex_executor.go   |  6 ++++
 .../executor/openai_compat_executor.go        |  6 ++++
 internal/runtime/executor/payload_helpers.go  | 35 +++++++++++++++++++
 internal/util/thinking.go                     | 12 ++-----
 4 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 3fe5ed6e..7003373f 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -56,6 +56,9 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
 	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.SetBytes(body, "stream", true)
@@ -151,6 +154,9 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
 	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index ba47750e..507b0fd9 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -64,6 +64,9 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
 	translated = normalizeThinkingConfig(translated, upstreamModel)
+	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
@@ -155,6 +158,9 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
 	}
 	translated = normalizeThinkingConfig(translated, upstreamModel)
+	if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 9d431f11..5711bbbd 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -1,6 +1,8 @@
 package executor
 
 import (
+	"fmt"
+	"net/http"
 	"strings"
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
@@ -289,3 +291,36 @@ func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
 
 	return out
 }
+
+// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
+// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
+// downgrading requests.
+func validateThinkingConfig(payload []byte, model string) error {
+	if len(payload) == 0 || model == "" {
+		return nil
+	}
+	if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
+		return nil
+	}
+
+	levels := util.GetModelThinkingLevels(model)
+	checkField := func(path string) error {
+		if effort := gjson.GetBytes(payload, path); effort.Exists() {
+			if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
+				return statusErr{
+					code: http.StatusBadRequest,
+					msg:  fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
+				}
+			}
+		}
+		return nil
+	}
+
+	if err := checkField("reasoning_effort"); err != nil {
+		return err
+	}
+	if err := checkField("reasoning.effort"); err != nil {
+		return err
+	}
+	return nil
+}
diff --git a/internal/util/thinking.go b/internal/util/thinking.go
index 37200980..9671f20b 100644
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -91,8 +91,7 @@ func ModelUsesThinkingLevels(model string) bool {
 }
 
 // NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
-// level for the given model. If the level is not supported, it returns the
-// first (lowest) level from the model's supported levels.
+// level for the given model. Returns false when the level is not supported.
 func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 	levels := GetModelThinkingLevels(model)
 	if len(levels) == 0 {
@@ -104,12 +103,5 @@ func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
 			return lvl, true
 		}
 	}
-	return defaultReasoningLevel(levels), true
-}
-
-func defaultReasoningLevel(levels []string) string {
-	if len(levels) > 0 {
-		return levels[0]
-	}
-	return ""
+	return "", false
 }

From 3a81ab22fdb6c9b993fac1deef94785f8a8f5dbf Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 14:35:05 +0800
Subject: [PATCH 06/11] fix(runtime): unify reasoning effort metadata overrides

---
 internal/runtime/executor/codex_executor.go   |  6 ++--
 internal/runtime/executor/iflow_executor.go   |  4 +--
 .../executor/openai_compat_executor.go        |  4 +--
 internal/runtime/executor/payload_helpers.go  | 30 +++-------------
 internal/runtime/executor/qwen_executor.go    |  4 +--
 internal/util/thinking_suffix.go              | 34 ++++++++++++++++---
 6 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/internal/runtime/executor/codex_executor.go b/internal/runtime/executor/codex_executor.go
index 7003373f..b9470b3c 100644
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -54,7 +54,7 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
 	body = normalizeThinkingConfig(body, upstreamModel)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return resp, errValidate
@@ -152,7 +152,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
 	body = normalizeThinkingConfig(body, upstreamModel)
 	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
 		return nil, errValidate
@@ -254,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 
 	modelForCounting := req.Model
 
-	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
 	body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index c68a6431..a445e47d 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -57,7 +57,7 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
@@ -143,7 +143,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
diff --git a/internal/runtime/executor/openai_compat_executor.go b/internal/runtime/executor/openai_compat_executor.go
index 507b0fd9..68b2963a 100644
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -58,7 +58,7 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
@@ -152,7 +152,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 		translated = e.overrideModel(translated, modelOverride)
 	}
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
-	translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
+	translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
 	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
 	if upstreamModel != "" {
 		translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 5711bbbd..61486d62 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -45,40 +45,20 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
-// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
-func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
+// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
+// Metadata values take precedence over any existing field when the model supports thinking.
+func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
 	if len(metadata) == 0 {
 		return payload
 	}
 	if !util.ModelSupportsThinking(model) {
 		return payload
 	}
-	if gjson.GetBytes(payload, "reasoning.effort").Exists() {
+	if field == "" {
 		return payload
 	}
 	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
-			return updated
-		}
-	}
-	return payload
-}
-
-// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
-// when present in metadata. It avoids overwriting an existing reasoning_effort field.
-func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
-	if len(metadata) == 0 {
-		return payload
-	}
-	if !util.ModelSupportsThinking(model) {
-		return payload
-	}
-	if gjson.GetBytes(payload, "reasoning_effort").Exists() {
-		return payload
-	}
-	if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
-		if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
+		if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
 			return updated
 		}
 	}
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index f060cb61..d25ed5da 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -51,7 +51,7 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
@@ -126,7 +126,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
-	body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
+	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
 	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index c2d806ad..47ce42f7 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -55,16 +55,42 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 			value := modelName[idx+len("-thinking-"):]
 			if value != "" {
 				if parsed, ok := parseIntPrefix(value); ok {
-					baseModel = modelName[:idx]
-					budgetOverride = &parsed
-					matched = true
+					candidateBase := modelName[:idx]
+					if ModelUsesThinkingLevels(candidateBase) {
+						baseModel = candidateBase
+						// Numeric suffix on level-aware models should still surface as reasoning effort metadata.
+						raw := strings.ToLower(strings.TrimSpace(value))
+						if raw != "" {
+							reasoningEffort = &raw
+						}
+						matched = true
+					} else {
+						baseModel = candidateBase
+						budgetOverride = &parsed
+						matched = true
+					}
 				} else {
 					baseModel = modelName[:idx]
 					if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
 						reasoningEffort = &normalized
 						matched = true
+					} else if !ModelUsesThinkingLevels(baseModel) {
+						// Keep unknown effort tokens so callers can honor user intent even without normalization.
+						raw := strings.ToLower(strings.TrimSpace(value))
+						if raw != "" {
+							reasoningEffort = &raw
+							matched = true
+						} else {
+							baseModel = modelName
+						}
 					} else {
-						baseModel = modelName
+						raw := strings.ToLower(strings.TrimSpace(value))
+						if raw != "" {
+							reasoningEffort = &raw
+							matched = true
+						} else {
+							baseModel = modelName
+						}
 					}
 				}
 			}

From 007572b58e2e6577f3c9a9a83d946e3b9c757437 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 15:52:14 +0800
Subject: [PATCH 07/11] fix(util): do not strip thinking suffix on registered
 models

NormalizeThinkingModel now checks ModelSupportsThinking before removing
"-thinking" or "-thinking-<ver>", avoiding accidental parsing of model
names where the suffix is part of the official id (e.g., kimi-k2-thinking,
qwen3-235b-a22b-thinking-2507).

The registry adds ThinkingSupport metadata for several models and
propagates it via ModelInfo (e.g., kimi-k2-thinking, deepseek-r1,
qwen3-235b-a22b-thinking-2507, minimax-m2), enabling accurate detection
of thinking-capable models and correcting base model inference.
---
 internal/registry/model_definitions.go | 10 ++++++----
 internal/util/thinking_suffix.go       | 19 +++++++++++++++----
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index 9956d964..adaff867 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -623,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
 		DisplayName string
 		Description string
 		Created     int64
+		Thinking    *ThinkingSupport
 	}{
 		{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
 		{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
@@ -632,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
-		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
-		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
+		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
 		{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
-		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
+		{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 		{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
 		{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
-		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
+		{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
 	}
 	models := make([]*ModelInfo, 0, len(entries))
 	for _, entry := range entries {
@@ -654,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
 			Type:        "iflow",
 			DisplayName: entry.DisplayName,
 			Description: entry.Description,
+			Thinking:    entry.Thinking,
 		})
 	}
 	return models
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index 47ce42f7..ef8302b0 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -52,6 +52,11 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 		matched = true
 	default:
 		if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
+			// Skip stripping if the original model is a registered thinking model.
+			// This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed.
+			if ModelSupportsThinking(modelName) {
+				break
+			}
 			value := modelName[idx+len("-thinking-"):]
 			if value != "" {
 				if parsed, ok := parseIntPrefix(value); ok {
@@ -95,10 +100,16 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 				}
 			}
 		} else if strings.HasSuffix(lower, "-thinking") {
-			baseModel = modelName[:len(modelName)-len("-thinking")]
-			effort := "medium"
-			reasoningEffort = &effort
-			matched = true
+			candidateBase := modelName[:len(modelName)-len("-thinking")]
+			// Only strip the suffix if the original model is NOT a registered thinking model.
+			// This prevents stripping "-thinking" from models like "kimi-k2-thinking" where
+			// the suffix is part of the model's actual name.
+			if !ModelSupportsThinking(modelName) {
+				baseModel = candidateBase
+				effort := "medium"
+				reasoningEffort = &effort
+				matched = true
+			}
 		}
 	}
 

From f6300c72b790c6017a08ceacc425f9863907493d Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 16:21:50 +0800
Subject: [PATCH 08/11] fix(runtime): validate thinking config in iflow and
 qwen

---
 internal/runtime/executor/iflow_executor.go | 14 ++++++++++++--
 internal/runtime/executor/qwen_executor.go  | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/internal/runtime/executor/iflow_executor.go b/internal/runtime/executor/iflow_executor.go
index a445e47d..d1a69812 100644
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -58,9 +58,14 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 
 	endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
@@ -144,9 +149,14 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
 	toolsResult := gjson.GetBytes(body, "tools")
 	if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
diff --git a/internal/runtime/executor/qwen_executor.go b/internal/runtime/executor/qwen_executor.go
index d25ed5da..2b8d0e50 100644
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -52,9 +52,14 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
 	to := sdktranslator.FromString("openai")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return resp, errValidate
+	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 
 	url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
@@ -127,9 +132,14 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 
 	body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
-	if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
+	upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
+	if upstreamModel != "" {
 		body, _ = sjson.SetBytes(body, "model", upstreamModel)
 	}
+	body = normalizeThinkingConfig(body, upstreamModel)
+	if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
+		return nil, errValidate
+	}
 	toolsResult := gjson.GetBytes(body, "tools")
 	// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
 	// This will have no real consequences. It's just to scare Qwen3.

From 21bbceca0ce75e651f9dd0a29a681f2c580c661f Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 16:35:36 +0800
Subject: [PATCH 09/11] docs(runtime): document reasoning effort precedence

---
 internal/runtime/executor/payload_helpers.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 61486d62..9c45681a 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -46,7 +46,8 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
 }
 
 // applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
-// Metadata values take precedence over any existing field when the model supports thinking.
+// Metadata values take precedence over any existing field when the model supports thinking, intentionally
+// overwriting caller-provided values to honor suffix/default metadata priority.
 func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
 	if len(metadata) == 0 {
 		return payload

From 6285459c08e9f6f5996374085053892d2d5b91fa Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 17:20:44 +0800
Subject: [PATCH 10/11] fix(runtime): unify claude thinking config resolution

---
 internal/runtime/executor/claude_executor.go | 56 +++-----------------
 internal/util/claude_thinking.go             | 46 ++++++++++++++++
 2 files changed, 52 insertions(+), 50 deletions(-)
 create mode 100644 internal/util/claude_thinking.go

diff --git a/internal/runtime/executor/claude_executor.go b/internal/runtime/executor/claude_executor.go
index c7470954..6af08608 100644
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
 	return betas, body
 }
 
-// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
+// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
+// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
+// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
 func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
-	// Only inject if thinking config is not already present
-	if gjson.GetBytes(body, "thinking").Exists() {
+	budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
+	if !ok {
 		return body
 	}
-
-	budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
-	if !ok || budgetTokens <= 0 {
-		return body
-	}
-
-	body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
-	body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
-	return body
-}
-
-func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
-	budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
-	if matched {
-		if include != nil && !*include {
-			return 0, false
-		}
-		if budget != nil {
-			normalized := util.NormalizeThinkingBudget(modelName, *budget)
-			if normalized > 0 {
-				return normalized, true
-			}
-			return 0, false
-		}
-		if effort != nil {
-			if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
-				return derived, true
-			}
-		}
-	}
-	return claudeBudgetFromSuffix(modelName)
-}
-
-func claudeBudgetFromSuffix(modelName string) (int, bool) {
-	lower := strings.ToLower(strings.TrimSpace(modelName))
-	switch {
-	case strings.HasSuffix(lower, "-thinking-low"):
-		return 1024, true
-	case strings.HasSuffix(lower, "-thinking-medium"):
-		return 8192, true
-	case strings.HasSuffix(lower, "-thinking-high"):
-		return 24576, true
-	case strings.HasSuffix(lower, "-thinking"):
-		return 8192, true
-	default:
-		return 0, false
-	}
+	return util.ApplyClaudeThinkingConfig(body, budget)
 }
 
 // ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
diff --git a/internal/util/claude_thinking.go b/internal/util/claude_thinking.go
new file mode 100644
index 00000000..b0c5a0a2
--- /dev/null
+++ b/internal/util/claude_thinking.go
@@ -0,0 +1,46 @@
+package util
+
+import (
+	"github.com/tidwall/gjson"
+	"github.com/tidwall/sjson"
+)
+
+// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
+// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
+// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
+func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
+	if budget == nil {
+		return body
+	}
+	if gjson.GetBytes(body, "thinking").Exists() {
+		return body
+	}
+	if *budget <= 0 {
+		return body
+	}
+	updated := body
+	updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
+	updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
+	return updated
+}
+
+// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
+// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
+// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
+func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
+	budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
+	if !matched {
+		return nil, false
+	}
+	if include != nil && !*include {
+		return nil, true
+	}
+	if budget == nil {
+		return nil, true
+	}
+	normalized := NormalizeThinkingBudget(modelName, *budget)
+	if normalized <= 0 {
+		return nil, true
+	}
+	return &normalized, true
+}

From facfe7c518cb528426dcb82c7f927e4f151bea33 Mon Sep 17 00:00:00 2001
From: hkfires <10558748+hkfires@users.noreply.github.com>
Date: Thu, 11 Dec 2025 18:17:28 +0800
Subject: [PATCH 11/11] refactor(thinking): use bracket tags for thinking meta

Align thinking suffix handling on a single bracket-style marker.

NormalizeThinkingModel strips a terminal `[value]` segment from
model identifiers and turns it into either a thinking budget (for
numeric values) or a reasoning effort hint (for strings). Emission
of `ThinkingIncludeThoughtsMetadataKey` is removed.

Executor helpers and the example config are updated so their
comments reference the new `[value]` suffix format instead of the
legacy dash variants.

BREAKING CHANGE: dash-based thinking suffixes (`-thinking`,
`-thinking-N`, `-reasoning`, `-nothinking`) are no longer parsed
for thinking metadata; only `[value]` annotations are recognized.
---
 config.example.yaml                          |   2 +-
 internal/runtime/executor/payload_helpers.go |   4 +-
 internal/util/thinking_suffix.go             | 122 ++++++-------------
 3 files changed, 41 insertions(+), 87 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index dfd7454b..31f16973 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -100,7 +100,7 @@ ws-auth: false
 #     excluded-models:
 #       - "claude-opus-4-5-20251101" # exclude specific models (exact match)
 #       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
-#       - "*-think"                  # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
+#       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
 #       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
 
 # OpenAI compatibility providers
diff --git a/internal/runtime/executor/payload_helpers.go b/internal/runtime/executor/payload_helpers.go
index 9c45681a..be249868 100644
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -11,7 +11,7 @@ import (
 	"github.com/tidwall/sjson"
 )
 
-// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
+// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192])
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
@@ -28,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
 
-// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
+// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192])
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
 func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
diff --git a/internal/util/thinking_suffix.go b/internal/util/thinking_suffix.go
index ef8302b0..c9a68534 100644
--- a/internal/util/thinking_suffix.go
+++ b/internal/util/thinking_suffix.go
@@ -14,100 +14,57 @@ const (
 )
 
 // NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
-// the normalized base model with extracted metadata. Supported patterns:
-//   - "-thinking-<number>" extracts a numeric budget
-//   - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
-//   - "-thinking" maps to a default reasoning effort of "medium"
-//   - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
-//   - "-nothinking" maps to budget=0 and include_thoughts=false
+// the normalized base model with extracted metadata. Supported pattern:
+//   - "[<value>]" where value can be:
+//   - A numeric budget (e.g., "[8192]", "[16384]")
+//   - A reasoning effort level (e.g., "[high]", "[medium]", "[low]")
+//
+// Examples:
+//   - "claude-sonnet-4-5-20250929[16384]" → budget=16384
+//   - "gpt-5.1[high]" → reasoning_effort="high"
+//   - "gemini-2.5-pro[32768]" → budget=32768
+//
+// Note: Empty brackets "[]" are not supported and will be ignored.
 func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 	if modelName == "" {
 		return modelName, nil
 	}
 
-	lower := strings.ToLower(modelName)
 	baseModel := modelName
 
 	var (
 		budgetOverride  *int
-		includeThoughts *bool
 		reasoningEffort *string
 		matched         bool
 	)
 
-	switch {
-	case strings.HasSuffix(lower, "-nothinking"):
-		baseModel = modelName[:len(modelName)-len("-nothinking")]
-		budget := 0
-		include := false
-		budgetOverride = &budget
-		includeThoughts = &include
-		matched = true
-	case strings.HasSuffix(lower, "-reasoning"):
-		baseModel = modelName[:len(modelName)-len("-reasoning")]
-		budget := -1
-		include := true
-		budgetOverride = &budget
-		includeThoughts = &include
-		matched = true
-	default:
-		if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
-			// Skip stripping if the original model is a registered thinking model.
-			// This prevents "-thinking-2507" in "qwen3-235b-a22b-thinking-2507" from being parsed.
-			if ModelSupportsThinking(modelName) {
-				break
-			}
-			value := modelName[idx+len("-thinking-"):]
-			if value != "" {
-				if parsed, ok := parseIntPrefix(value); ok {
-					candidateBase := modelName[:idx]
-					if ModelUsesThinkingLevels(candidateBase) {
-						baseModel = candidateBase
-						// Numeric suffix on level-aware models should still surface as reasoning effort metadata.
-						raw := strings.ToLower(strings.TrimSpace(value))
-						if raw != "" {
-							reasoningEffort = &raw
-						}
-						matched = true
-					} else {
-						baseModel = candidateBase
-						budgetOverride = &parsed
-						matched = true
-					}
-				} else {
-					baseModel = modelName[:idx]
-					if normalized, ok := NormalizeReasoningEffortLevel(baseModel, value); ok {
-						reasoningEffort = &normalized
-						matched = true
-					} else if !ModelUsesThinkingLevels(baseModel) {
-						// Keep unknown effort tokens so callers can honor user intent even without normalization.
-						raw := strings.ToLower(strings.TrimSpace(value))
-						if raw != "" {
-							reasoningEffort = &raw
-							matched = true
-						} else {
-							baseModel = modelName
-						}
-					} else {
-						raw := strings.ToLower(strings.TrimSpace(value))
-						if raw != "" {
-							reasoningEffort = &raw
-							matched = true
-						} else {
-							baseModel = modelName
-						}
-					}
-				}
-			}
-		} else if strings.HasSuffix(lower, "-thinking") {
-			candidateBase := modelName[:len(modelName)-len("-thinking")]
-			// Only strip the suffix if the original model is NOT a registered thinking model.
-			// This prevents stripping "-thinking" from models like "kimi-k2-thinking" where
-			// the suffix is part of the model's actual name.
-			if !ModelSupportsThinking(modelName) {
-				baseModel = candidateBase
-				effort := "medium"
-				reasoningEffort = &effort
+	// Match "[value]" pattern at the end of the model name
+	if idx := strings.LastIndex(modelName, "["); idx != -1 {
+		if !strings.HasSuffix(modelName, "]") {
+			// Incomplete bracket, ignore
+			return baseModel, nil
+		}
+
+		value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ]
+		if value == "" {
+			// Empty brackets not supported
+			return baseModel, nil
+		}
+
+		candidateBase := modelName[:idx]
+
+		// Auto-detect: pure numeric → budget, string → reasoning effort level
+		if parsed, ok := parseIntPrefix(value); ok {
+			// Numeric value: treat as thinking budget
+			baseModel = candidateBase
+			budgetOverride = &parsed
+			matched = true
+		} else {
+			// String value: treat as reasoning effort level
+			baseModel = candidateBase
+			raw := strings.ToLower(strings.TrimSpace(value))
+			if raw != "" {
+				reasoningEffort = &raw
 				matched = true
 			}
 		}
@@ -123,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
 	if budgetOverride != nil {
 		metadata[ThinkingBudgetMetadataKey] = *budgetOverride
 	}
-	if includeThoughts != nil {
-		metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
-	}
 	if reasoningEffort != nil {
 		metadata[ReasoningEffortMetadataKey] = *reasoningEffort
 	}