+1
-1
@@ -100,7 +100,7 @@ ws-auth: false
|
|||||||
# excluded-models:
|
# excluded-models:
|
||||||
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
|
# - "claude-opus-4-5-20251101" # exclude specific models (exact match)
|
||||||
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
# - "claude-3-*" # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
|
||||||
# - "*-think" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
# - "*-thinking" # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
|
||||||
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
# - "*haiku*" # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
|
||||||
|
|
||||||
# OpenAI compatibility providers
|
# OpenAI compatibility providers
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4.5 Haiku",
|
DisplayName: "Claude 4.5 Haiku",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
|
// Thinking: not supported for Haiku models
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4-5-20250929",
|
ID: "claude-sonnet-4-5-20250929",
|
||||||
@@ -49,6 +50,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4.1 Opus",
|
DisplayName: "Claude 4.1 Opus",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 32000,
|
MaxCompletionTokens: 32000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-opus-4-20250514",
|
ID: "claude-opus-4-20250514",
|
||||||
@@ -59,6 +61,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4 Opus",
|
DisplayName: "Claude 4 Opus",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 32000,
|
MaxCompletionTokens: 32000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-sonnet-4-20250514",
|
ID: "claude-sonnet-4-20250514",
|
||||||
@@ -69,6 +72,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 4 Sonnet",
|
DisplayName: "Claude 4 Sonnet",
|
||||||
ContextLength: 200000,
|
ContextLength: 200000,
|
||||||
MaxCompletionTokens: 64000,
|
MaxCompletionTokens: 64000,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-3-7-sonnet-20250219",
|
ID: "claude-3-7-sonnet-20250219",
|
||||||
@@ -79,6 +83,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 3.7 Sonnet",
|
DisplayName: "Claude 3.7 Sonnet",
|
||||||
ContextLength: 128000,
|
ContextLength: 128000,
|
||||||
MaxCompletionTokens: 8192,
|
MaxCompletionTokens: 8192,
|
||||||
|
Thinking: &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "claude-3-5-haiku-20241022",
|
ID: "claude-3-5-haiku-20241022",
|
||||||
@@ -89,6 +94,7 @@ func GetClaudeModels() []*ModelInfo {
|
|||||||
DisplayName: "Claude 3.5 Haiku",
|
DisplayName: "Claude 3.5 Haiku",
|
||||||
ContextLength: 128000,
|
ContextLength: 128000,
|
||||||
MaxCompletionTokens: 8192,
|
MaxCompletionTokens: 8192,
|
||||||
|
// Thinking: not supported for Haiku models
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -476,6 +482,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"minimal", "low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5-codex",
|
ID: "gpt-5-codex",
|
||||||
@@ -489,6 +496,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5-codex-mini",
|
ID: "gpt-5-codex-mini",
|
||||||
@@ -502,6 +510,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1",
|
ID: "gpt-5.1",
|
||||||
@@ -515,6 +524,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"none", "low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1-codex",
|
ID: "gpt-5.1-codex",
|
||||||
@@ -528,6 +538,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1-codex-mini",
|
ID: "gpt-5.1-codex-mini",
|
||||||
@@ -541,6 +552,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gpt-5.1-codex-max",
|
ID: "gpt-5.1-codex-max",
|
||||||
@@ -554,6 +566,7 @@ func GetOpenAIModels() []*ModelInfo {
|
|||||||
ContextLength: 400000,
|
ContextLength: 400000,
|
||||||
MaxCompletionTokens: 128000,
|
MaxCompletionTokens: 128000,
|
||||||
SupportedParameters: []string{"tools"},
|
SupportedParameters: []string{"tools"},
|
||||||
|
Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -610,6 +623,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
DisplayName string
|
DisplayName string
|
||||||
Description string
|
Description string
|
||||||
Created int64
|
Created int64
|
||||||
|
Thinking *ThinkingSupport
|
||||||
}{
|
}{
|
||||||
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
{ID: "tstars2.0", DisplayName: "TStars-2.0", Description: "iFlow TStars-2.0 multimodal assistant", Created: 1746489600},
|
||||||
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
{ID: "qwen3-coder-plus", DisplayName: "Qwen3-Coder-Plus", Description: "Qwen3 Coder Plus code generation", Created: 1753228800},
|
||||||
@@ -619,17 +633,17 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905", Created: 1757030400},
|
||||||
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
|
{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model", Created: 1759190400},
|
||||||
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model", Created: 1752192000},
|
||||||
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model", Created: 1762387200},
|
{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 thinking model", Created: 1762387200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
|
{ID: "deepseek-v3.2-chat", DisplayName: "DeepSeek-V3.2", Description: "DeepSeek V3.2", Created: 1764576000},
|
||||||
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental", Created: 1759104000},
|
||||||
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus", Created: 1756339200},
|
||||||
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200},
|
{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1", Created: 1737331200, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
{ID: "deepseek-v3", DisplayName: "DeepSeek-V3-671B", Description: "DeepSeek V3 671B", Created: 1734307200},
|
||||||
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
{ID: "qwen3-32b", DisplayName: "Qwen3-32B", Description: "Qwen3 32B", Created: 1747094400},
|
||||||
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-thinking-2507", DisplayName: "Qwen3-235B-A22B-Thinking", Description: "Qwen3 235B A22B Thinking (2507)", Created: 1753401600, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
{ID: "qwen3-235b-a22b-instruct", DisplayName: "Qwen3-235B-A22B-Instruct", Description: "Qwen3 235B A22B Instruct", Created: 1753401600},
|
||||||
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
{ID: "qwen3-235b", DisplayName: "Qwen3-235B-A22B", Description: "Qwen3 235B A22B", Created: 1753401600},
|
||||||
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000},
|
{ID: "minimax-m2", DisplayName: "MiniMax-M2", Description: "MiniMax M2", Created: 1758672000, Thinking: &ThinkingSupport{Levels: []string{"low", "medium", "high"}}},
|
||||||
}
|
}
|
||||||
models := make([]*ModelInfo, 0, len(entries))
|
models := make([]*ModelInfo, 0, len(entries))
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
@@ -641,6 +655,7 @@ func GetIFlowModels() []*ModelInfo {
|
|||||||
Type: "iflow",
|
Type: "iflow",
|
||||||
DisplayName: entry.DisplayName,
|
DisplayName: entry.DisplayName,
|
||||||
Description: entry.Description,
|
Description: entry.Description,
|
||||||
|
Thinking: entry.Thinking,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return models
|
return models
|
||||||
|
|||||||
@@ -63,6 +63,9 @@ type ThinkingSupport struct {
|
|||||||
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
||||||
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
||||||
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
||||||
|
// Levels defines discrete reasoning effort levels (e.g., "low", "medium", "high").
|
||||||
|
// When set, the model uses level-based reasoning instead of token budgets.
|
||||||
|
Levels []string `json:"levels,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelRegistration tracks a model's availability
|
// ModelRegistration tracks a model's availability
|
||||||
|
|||||||
@@ -450,59 +450,15 @@ func extractAndRemoveBetas(body []byte) ([]string, []byte) {
|
|||||||
return betas, body
|
return betas, body
|
||||||
}
|
}
|
||||||
|
|
||||||
// injectThinkingConfig adds thinking configuration based on metadata or legacy suffixes.
|
// injectThinkingConfig adds thinking configuration based on metadata using the unified flow.
|
||||||
|
// It uses util.ResolveClaudeThinkingConfig which internally calls ResolveThinkingConfigFromMetadata
|
||||||
|
// and NormalizeThinkingBudget, ensuring consistency with other executors like Gemini.
|
||||||
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
func (e *ClaudeExecutor) injectThinkingConfig(modelName string, metadata map[string]any, body []byte) []byte {
|
||||||
// Only inject if thinking config is not already present
|
budget, ok := util.ResolveClaudeThinkingConfig(modelName, metadata)
|
||||||
if gjson.GetBytes(body, "thinking").Exists() {
|
if !ok {
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
return util.ApplyClaudeThinkingConfig(body, budget)
|
||||||
budgetTokens, ok := resolveClaudeThinkingBudget(modelName, metadata)
|
|
||||||
if !ok || budgetTokens <= 0 {
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
|
|
||||||
body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
func resolveClaudeThinkingBudget(modelName string, metadata map[string]any) (int, bool) {
|
|
||||||
budget, include, effort, matched := util.ThinkingFromMetadata(metadata)
|
|
||||||
if matched {
|
|
||||||
if include != nil && !*include {
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
if budget != nil {
|
|
||||||
normalized := util.NormalizeThinkingBudget(modelName, *budget)
|
|
||||||
if normalized > 0 {
|
|
||||||
return normalized, true
|
|
||||||
}
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
if effort != nil {
|
|
||||||
if derived, ok := util.ThinkingEffortToBudget(modelName, *effort); ok && derived > 0 {
|
|
||||||
return derived, true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return claudeBudgetFromSuffix(modelName)
|
|
||||||
}
|
|
||||||
|
|
||||||
func claudeBudgetFromSuffix(modelName string) (int, bool) {
|
|
||||||
lower := strings.ToLower(strings.TrimSpace(modelName))
|
|
||||||
switch {
|
|
||||||
case strings.HasSuffix(lower, "-thinking-low"):
|
|
||||||
return 1024, true
|
|
||||||
case strings.HasSuffix(lower, "-thinking-medium"):
|
|
||||||
return 8192, true
|
|
||||||
case strings.HasSuffix(lower, "-thinking-high"):
|
|
||||||
return 24576, true
|
|
||||||
case strings.HasSuffix(lower, "-thinking"):
|
|
||||||
return 8192, true
|
|
||||||
default:
|
|
||||||
return 0, false
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
|
||||||
|
|||||||
@@ -54,7 +54,11 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("codex")
|
to := sdktranslator.FromString("codex")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
body, _ = sjson.SetBytes(body, "stream", true)
|
body, _ = sjson.SetBytes(body, "stream", true)
|
||||||
@@ -148,7 +152,11 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
to := sdktranslator.FromString("codex")
|
to := sdktranslator.FromString("codex")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
@@ -246,7 +254,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
|
|
||||||
modelForCounting := req.Model
|
modelForCounting := req.Model
|
||||||
|
|
||||||
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning.effort")
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
body, _ = sjson.DeleteBytes(body, "previous_response_id")
|
||||||
body, _ = sjson.SetBytes(body, "stream", false)
|
body, _ = sjson.SetBytes(body, "stream", false)
|
||||||
|
|||||||
@@ -57,10 +57,15 @@ func (e *IFlowExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
}
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
endpoint := strings.TrimSuffix(baseURL, "/") + iflowDefaultEndpoint
|
||||||
@@ -143,10 +148,15 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
|
|||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
}
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
// Ensure tools array exists to avoid provider quirks similar to Qwen's behaviour.
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
toolsResult := gjson.GetBytes(body, "tools")
|
||||||
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
if toolsResult.Exists() && toolsResult.IsArray() && len(toolsResult.Array()) == 0 {
|
||||||
|
|||||||
@@ -58,10 +58,15 @@ func (e *OpenAICompatExecutor) Execute(ctx context.Context, auth *cliproxyauth.A
|
|||||||
translated = e.overrideModel(translated, modelOverride)
|
translated = e.overrideModel(translated, modelOverride)
|
||||||
}
|
}
|
||||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||||
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
|
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||||
}
|
}
|
||||||
|
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||||
@@ -147,10 +152,15 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
|
|||||||
translated = e.overrideModel(translated, modelOverride)
|
translated = e.overrideModel(translated, modelOverride)
|
||||||
}
|
}
|
||||||
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
translated = applyPayloadConfigWithRoot(e.cfg, req.Model, to.String(), "", translated)
|
||||||
translated = applyReasoningEffortMetadataChatCompletions(translated, req.Metadata, req.Model)
|
translated = applyReasoningEffortMetadata(translated, req.Metadata, req.Model, "reasoning_effort")
|
||||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
translated, _ = sjson.SetBytes(translated, "model", upstreamModel)
|
||||||
}
|
}
|
||||||
|
translated = normalizeThinkingConfig(translated, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(translated, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(translated))
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package executor
|
package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
|
||||||
@@ -9,7 +11,7 @@ import (
|
|||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
|
|
||||||
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
// applyThinkingMetadata applies thinking config from model suffix metadata (e.g., [high], [8192])
|
||||||
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
// for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
|
||||||
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
func applyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
||||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||||
@@ -26,7 +28,7 @@ func applyThinkingMetadata(payload []byte, metadata map[string]any, model string
|
|||||||
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
|
|
||||||
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., -reasoning, -thinking-N)
|
// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., [high], [8192])
|
||||||
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
// for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
|
||||||
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
|
||||||
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
|
||||||
@@ -43,40 +45,21 @@ func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model str
|
|||||||
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
|
|
||||||
// applyReasoningEffortMetadata applies reasoning effort overrides (reasoning.effort) when present in metadata.
|
// applyReasoningEffortMetadata applies reasoning effort overrides from metadata to the given JSON path.
|
||||||
// It avoids overwriting an existing reasoning.effort field and only applies to models that support thinking.
|
// Metadata values take precedence over any existing field when the model supports thinking, intentionally
|
||||||
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model string) []byte {
|
// overwriting caller-provided values to honor suffix/default metadata priority.
|
||||||
|
func applyReasoningEffortMetadata(payload []byte, metadata map[string]any, model, field string) []byte {
|
||||||
if len(metadata) == 0 {
|
if len(metadata) == 0 {
|
||||||
return payload
|
return payload
|
||||||
}
|
}
|
||||||
if !util.ModelSupportsThinking(model) {
|
if !util.ModelSupportsThinking(model) {
|
||||||
return payload
|
return payload
|
||||||
}
|
}
|
||||||
if gjson.GetBytes(payload, "reasoning.effort").Exists() {
|
if field == "" {
|
||||||
return payload
|
return payload
|
||||||
}
|
}
|
||||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
||||||
if updated, err := sjson.SetBytes(payload, "reasoning.effort", effort); err == nil {
|
if updated, err := sjson.SetBytes(payload, field, effort); err == nil {
|
||||||
return updated
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return payload
|
|
||||||
}
|
|
||||||
|
|
||||||
// applyReasoningEffortMetadataChatCompletions applies reasoning_effort (OpenAI chat completions field)
|
|
||||||
// when present in metadata. It avoids overwriting an existing reasoning_effort field.
|
|
||||||
func applyReasoningEffortMetadataChatCompletions(payload []byte, metadata map[string]any, model string) []byte {
|
|
||||||
if len(metadata) == 0 {
|
|
||||||
return payload
|
|
||||||
}
|
|
||||||
if !util.ModelSupportsThinking(model) {
|
|
||||||
return payload
|
|
||||||
}
|
|
||||||
if gjson.GetBytes(payload, "reasoning_effort").Exists() {
|
|
||||||
return payload
|
|
||||||
}
|
|
||||||
if effort, ok := util.ReasoningEffortFromMetadata(metadata); ok && effort != "" {
|
|
||||||
if updated, err := sjson.SetBytes(payload, "reasoning_effort", effort); err == nil {
|
|
||||||
return updated
|
return updated
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -232,3 +215,93 @@ func matchModelPattern(pattern, model string) bool {
|
|||||||
}
|
}
|
||||||
return pi == len(pattern)
|
return pi == len(pattern)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// normalizeThinkingConfig normalizes thinking-related fields in the payload
|
||||||
|
// based on model capabilities. For models without thinking support, it strips
|
||||||
|
// reasoning fields. For models with level-based thinking, it validates and
|
||||||
|
// normalizes the reasoning effort level.
|
||||||
|
func normalizeThinkingConfig(payload []byte, model string) []byte {
|
||||||
|
if len(payload) == 0 || model == "" {
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
if !util.ModelSupportsThinking(model) {
|
||||||
|
return stripThinkingFields(payload)
|
||||||
|
}
|
||||||
|
|
||||||
|
if util.ModelUsesThinkingLevels(model) {
|
||||||
|
return normalizeReasoningEffortLevel(payload, model)
|
||||||
|
}
|
||||||
|
|
||||||
|
return payload
|
||||||
|
}
|
||||||
|
|
||||||
|
// stripThinkingFields removes thinking-related fields from the payload for
|
||||||
|
// models that do not support thinking.
|
||||||
|
func stripThinkingFields(payload []byte) []byte {
|
||||||
|
fieldsToRemove := []string{
|
||||||
|
"reasoning",
|
||||||
|
"reasoning_effort",
|
||||||
|
"reasoning.effort",
|
||||||
|
}
|
||||||
|
out := payload
|
||||||
|
for _, field := range fieldsToRemove {
|
||||||
|
if gjson.GetBytes(out, field).Exists() {
|
||||||
|
out, _ = sjson.DeleteBytes(out, field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeReasoningEffortLevel validates and normalizes the reasoning_effort
|
||||||
|
// or reasoning.effort field for level-based thinking models.
|
||||||
|
func normalizeReasoningEffortLevel(payload []byte, model string) []byte {
|
||||||
|
out := payload
|
||||||
|
|
||||||
|
if effort := gjson.GetBytes(out, "reasoning_effort"); effort.Exists() {
|
||||||
|
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||||
|
out, _ = sjson.SetBytes(out, "reasoning_effort", normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if effort := gjson.GetBytes(out, "reasoning.effort"); effort.Exists() {
|
||||||
|
if normalized, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); ok {
|
||||||
|
out, _ = sjson.SetBytes(out, "reasoning.effort", normalized)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateThinkingConfig checks for unsupported reasoning levels on level-based models.
|
||||||
|
// Returns a statusErr with 400 when an unsupported level is supplied to avoid silently
|
||||||
|
// downgrading requests.
|
||||||
|
func validateThinkingConfig(payload []byte, model string) error {
|
||||||
|
if len(payload) == 0 || model == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !util.ModelSupportsThinking(model) || !util.ModelUsesThinkingLevels(model) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
levels := util.GetModelThinkingLevels(model)
|
||||||
|
checkField := func(path string) error {
|
||||||
|
if effort := gjson.GetBytes(payload, path); effort.Exists() {
|
||||||
|
if _, ok := util.NormalizeReasoningEffortLevel(model, effort.String()); !ok {
|
||||||
|
return statusErr{
|
||||||
|
code: http.StatusBadRequest,
|
||||||
|
msg: fmt.Sprintf("unsupported reasoning effort level %q for model %s (supported: %s)", effort.String(), model, strings.Join(levels, ", ")),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := checkField("reasoning_effort"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := checkField("reasoning.effort"); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -51,10 +51,15 @@ func (e *QwenExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
}
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return resp, errValidate
|
||||||
|
}
|
||||||
body = applyPayloadConfig(e.cfg, req.Model, body)
|
body = applyPayloadConfig(e.cfg, req.Model, body)
|
||||||
|
|
||||||
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
url := strings.TrimSuffix(baseURL, "/") + "/chat/completions"
|
||||||
@@ -126,10 +131,15 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
to := sdktranslator.FromString("openai")
|
to := sdktranslator.FromString("openai")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
|
|
||||||
body = applyReasoningEffortMetadataChatCompletions(body, req.Metadata, req.Model)
|
body = applyReasoningEffortMetadata(body, req.Metadata, req.Model, "reasoning_effort")
|
||||||
if upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata); upstreamModel != "" {
|
upstreamModel := util.ResolveOriginalModel(req.Model, req.Metadata)
|
||||||
|
if upstreamModel != "" {
|
||||||
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
body, _ = sjson.SetBytes(body, "model", upstreamModel)
|
||||||
}
|
}
|
||||||
|
body = normalizeThinkingConfig(body, upstreamModel)
|
||||||
|
if errValidate := validateThinkingConfig(body, upstreamModel); errValidate != nil {
|
||||||
|
return nil, errValidate
|
||||||
|
}
|
||||||
toolsResult := gjson.GetBytes(body, "tools")
|
toolsResult := gjson.GetBytes(body, "tools")
|
||||||
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
// I'm addressing the Qwen3 "poisoning" issue, which is caused by the model needing a tool to be defined. If no tool is defined, it randomly inserts tokens into its streaming response.
|
||||||
// This will have no real consequences. It's just to scare Qwen3.
|
// This will have no real consequences. It's just to scare Qwen3.
|
||||||
|
|||||||
@@ -0,0 +1,46 @@
|
|||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/tidwall/gjson"
|
||||||
|
"github.com/tidwall/sjson"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ApplyClaudeThinkingConfig applies thinking configuration to a Claude API request payload.
|
||||||
|
// It sets the thinking.type to "enabled" and thinking.budget_tokens to the specified budget.
|
||||||
|
// If budget is nil or the payload already has thinking config, it returns the payload unchanged.
|
||||||
|
func ApplyClaudeThinkingConfig(body []byte, budget *int) []byte {
|
||||||
|
if budget == nil {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
if gjson.GetBytes(body, "thinking").Exists() {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
if *budget <= 0 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
updated := body
|
||||||
|
updated, _ = sjson.SetBytes(updated, "thinking.type", "enabled")
|
||||||
|
updated, _ = sjson.SetBytes(updated, "thinking.budget_tokens", *budget)
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResolveClaudeThinkingConfig resolves thinking configuration from metadata for Claude models.
|
||||||
|
// It uses the unified ResolveThinkingConfigFromMetadata and normalizes the budget.
|
||||||
|
// Returns the normalized budget (nil if thinking should not be enabled) and whether it matched.
|
||||||
|
func ResolveClaudeThinkingConfig(modelName string, metadata map[string]any) (*int, bool) {
|
||||||
|
budget, include, matched := ResolveThinkingConfigFromMetadata(modelName, metadata)
|
||||||
|
if !matched {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
if include != nil && !*include {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
if budget == nil {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
normalized := NormalizeThinkingBudget(modelName, *budget)
|
||||||
|
if normalized <= 0 {
|
||||||
|
return nil, true
|
||||||
|
}
|
||||||
|
return &normalized, true
|
||||||
|
}
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
package util
|
package util
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -67,3 +69,39 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
|
|||||||
}
|
}
|
||||||
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
|
||||||
|
// Returns nil if the model has no thinking support or no levels defined.
|
||||||
|
func GetModelThinkingLevels(model string) []string {
|
||||||
|
if model == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||||
|
if info == nil || info.Thinking == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return info.Thinking.Levels
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelUsesThinkingLevels reports whether the model uses discrete reasoning
|
||||||
|
// effort levels instead of numeric budgets.
|
||||||
|
func ModelUsesThinkingLevels(model string) bool {
|
||||||
|
levels := GetModelThinkingLevels(model)
|
||||||
|
return len(levels) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// NormalizeReasoningEffortLevel validates and normalizes a reasoning effort
|
||||||
|
// level for the given model. Returns false when the level is not supported.
|
||||||
|
func NormalizeReasoningEffortLevel(model, effort string) (string, bool) {
|
||||||
|
levels := GetModelThinkingLevels(model)
|
||||||
|
if len(levels) == 0 {
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
loweredEffort := strings.ToLower(strings.TrimSpace(effort))
|
||||||
|
for _, lvl := range levels {
|
||||||
|
if strings.ToLower(lvl) == loweredEffort {
|
||||||
|
return lvl, true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "", false
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,62 +14,60 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
// NormalizeThinkingModel parses dynamic thinking suffixes on model names and returns
|
||||||
// the normalized base model with extracted metadata. Supported patterns:
|
// the normalized base model with extracted metadata. Supported pattern:
|
||||||
// - "-thinking-<number>" extracts a numeric budget
|
// - "[<value>]" where value can be:
|
||||||
// - "-thinking-<level>" extracts a reasoning effort level (minimal/low/medium/high/xhigh/auto/none)
|
// - A numeric budget (e.g., "[8192]", "[16384]")
|
||||||
// - "-thinking" maps to a default reasoning effort of "medium"
|
// - A reasoning effort level (e.g., "[high]", "[medium]", "[low]")
|
||||||
// - "-reasoning" maps to dynamic budget (-1) and include_thoughts=true
|
//
|
||||||
// - "-nothinking" maps to budget=0 and include_thoughts=false
|
// Examples:
|
||||||
|
// - "claude-sonnet-4-5-20250929[16384]" → budget=16384
|
||||||
|
// - "gpt-5.1[high]" → reasoning_effort="high"
|
||||||
|
// - "gemini-2.5-pro[32768]" → budget=32768
|
||||||
|
//
|
||||||
|
// Note: Empty brackets "[]" are not supported and will be ignored.
|
||||||
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
||||||
if modelName == "" {
|
if modelName == "" {
|
||||||
return modelName, nil
|
return modelName, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
lower := strings.ToLower(modelName)
|
|
||||||
baseModel := modelName
|
baseModel := modelName
|
||||||
|
|
||||||
var (
|
var (
|
||||||
budgetOverride *int
|
budgetOverride *int
|
||||||
includeThoughts *bool
|
|
||||||
reasoningEffort *string
|
reasoningEffort *string
|
||||||
matched bool
|
matched bool
|
||||||
)
|
)
|
||||||
|
|
||||||
switch {
|
// Match "[value]" pattern at the end of the model name
|
||||||
case strings.HasSuffix(lower, "-nothinking"):
|
if idx := strings.LastIndex(modelName, "["); idx != -1 {
|
||||||
baseModel = modelName[:len(modelName)-len("-nothinking")]
|
if !strings.HasSuffix(modelName, "]") {
|
||||||
budget := 0
|
// Incomplete bracket, ignore
|
||||||
include := false
|
return baseModel, nil
|
||||||
budgetOverride = &budget
|
}
|
||||||
includeThoughts = &include
|
|
||||||
matched = true
|
value := modelName[idx+1 : len(modelName)-1] // Extract content between [ and ]
|
||||||
case strings.HasSuffix(lower, "-reasoning"):
|
if value == "" {
|
||||||
baseModel = modelName[:len(modelName)-len("-reasoning")]
|
// Empty brackets not supported
|
||||||
budget := -1
|
return baseModel, nil
|
||||||
include := true
|
}
|
||||||
budgetOverride = &budget
|
|
||||||
includeThoughts = &include
|
candidateBase := modelName[:idx]
|
||||||
matched = true
|
|
||||||
default:
|
// Auto-detect: pure numeric → budget, string → reasoning effort level
|
||||||
if idx := strings.LastIndex(lower, "-thinking-"); idx != -1 {
|
|
||||||
value := modelName[idx+len("-thinking-"):]
|
|
||||||
if value != "" {
|
|
||||||
if parsed, ok := parseIntPrefix(value); ok {
|
if parsed, ok := parseIntPrefix(value); ok {
|
||||||
baseModel = modelName[:idx]
|
// Numeric value: treat as thinking budget
|
||||||
|
baseModel = candidateBase
|
||||||
budgetOverride = &parsed
|
budgetOverride = &parsed
|
||||||
matched = true
|
matched = true
|
||||||
} else if effort, okEffort := normalizeReasoningEffort(value); okEffort {
|
} else {
|
||||||
baseModel = modelName[:idx]
|
// String value: treat as reasoning effort level
|
||||||
reasoningEffort = &effort
|
baseModel = candidateBase
|
||||||
|
raw := strings.ToLower(strings.TrimSpace(value))
|
||||||
|
if raw != "" {
|
||||||
|
reasoningEffort = &raw
|
||||||
matched = true
|
matched = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if strings.HasSuffix(lower, "-thinking") {
|
|
||||||
baseModel = modelName[:len(modelName)-len("-thinking")]
|
|
||||||
effort := "medium"
|
|
||||||
reasoningEffort = &effort
|
|
||||||
matched = true
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if !matched {
|
if !matched {
|
||||||
@@ -82,9 +80,6 @@ func NormalizeThinkingModel(modelName string) (string, map[string]any) {
|
|||||||
if budgetOverride != nil {
|
if budgetOverride != nil {
|
||||||
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
metadata[ThinkingBudgetMetadataKey] = *budgetOverride
|
||||||
}
|
}
|
||||||
if includeThoughts != nil {
|
|
||||||
metadata[ThinkingIncludeThoughtsMetadataKey] = *includeThoughts
|
|
||||||
}
|
|
||||||
if reasoningEffort != nil {
|
if reasoningEffort != nil {
|
||||||
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
metadata[ReasoningEffortMetadataKey] = *reasoningEffort
|
||||||
}
|
}
|
||||||
@@ -185,7 +180,7 @@ func ReasoningEffortFromMetadata(metadata map[string]any) (string, bool) {
|
|||||||
return "", false
|
return "", false
|
||||||
}
|
}
|
||||||
if effort != nil && *effort != "" {
|
if effort != nil && *effort != "" {
|
||||||
return *effort, true
|
return strings.ToLower(strings.TrimSpace(*effort)), true
|
||||||
}
|
}
|
||||||
if budget != nil {
|
if budget != nil {
|
||||||
switch *budget {
|
switch *budget {
|
||||||
@@ -207,7 +202,11 @@ func ThinkingEffortToBudget(model, effort string) (int, bool) {
|
|||||||
if effort == "" {
|
if effort == "" {
|
||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
switch strings.ToLower(effort) {
|
normalized, ok := NormalizeReasoningEffortLevel(model, effort)
|
||||||
|
if !ok {
|
||||||
|
normalized = strings.ToLower(strings.TrimSpace(effort))
|
||||||
|
}
|
||||||
|
switch normalized {
|
||||||
case "none":
|
case "none":
|
||||||
return 0, true
|
return 0, true
|
||||||
case "auto":
|
case "auto":
|
||||||
@@ -312,16 +311,3 @@ func parseNumberToInt(raw any) (int, bool) {
|
|||||||
}
|
}
|
||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeReasoningEffort(value string) (string, bool) {
|
|
||||||
if value == "" {
|
|
||||||
return "", false
|
|
||||||
}
|
|
||||||
effort := strings.ToLower(strings.TrimSpace(value))
|
|
||||||
switch effort {
|
|
||||||
case "minimal", "low", "medium", "high", "xhigh", "auto", "none":
|
|
||||||
return effort, true
|
|
||||||
default:
|
|
||||||
return "", false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user