Merge pull request #185 from router-for-me/thinking

Feat: Add reasoning effort support for Gemini models
This commit is contained in:
Luis Pater
2025-10-29 20:27:07 +08:00
committed by GitHub
12 changed files with 305 additions and 100 deletions
+10 -4
View File
@@ -84,6 +84,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
}, },
{ {
ID: "gemini-2.5-pro", ID: "gemini-2.5-pro",
@@ -98,6 +99,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
}, },
{ {
ID: "gemini-2.5-flash-lite", ID: "gemini-2.5-flash-lite",
@@ -112,6 +114,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
}, },
{ {
ID: "gemini-2.5-flash-image-preview", ID: "gemini-2.5-flash-image-preview",
@@ -126,6 +129,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 8192, OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
}, },
{ {
ID: "gemini-2.5-flash-image", ID: "gemini-2.5-flash-image",
@@ -140,6 +144,7 @@ func GeminiModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 8192, OutputTokenLimit: 8192,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
// image models don't support thinkingConfig; leave Thinking nil
}, },
} }
} }
@@ -152,9 +157,8 @@ func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() }
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations // GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
func GetAIStudioModels() []*ModelInfo { func GetAIStudioModels() []*ModelInfo {
models := make([]*ModelInfo, 0, 8) base := GeminiModels()
models = append(models, GeminiModels()...) return append(base,
models = append(models,
&ModelInfo{ &ModelInfo{
ID: "gemini-pro-latest", ID: "gemini-pro-latest",
Object: "model", Object: "model",
@@ -168,6 +172,7 @@ func GetAIStudioModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
}, },
&ModelInfo{ &ModelInfo{
ID: "gemini-flash-latest", ID: "gemini-flash-latest",
@@ -182,6 +187,7 @@ func GetAIStudioModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
}, },
&ModelInfo{ &ModelInfo{
ID: "gemini-flash-lite-latest", ID: "gemini-flash-lite-latest",
@@ -196,9 +202,9 @@ func GetAIStudioModels() []*ModelInfo {
InputTokenLimit: 1048576, InputTokenLimit: 1048576,
OutputTokenLimit: 65536, OutputTokenLimit: 65536,
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"}, SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
}, },
) )
return models
} }
// GetOpenAIModels returns the standard OpenAI model definitions // GetOpenAIModels returns the standard OpenAI model definitions
+28
View File
@@ -45,6 +45,23 @@ type ModelInfo struct {
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
// SupportedParameters lists supported parameters // SupportedParameters lists supported parameters
SupportedParameters []string `json:"supported_parameters,omitempty"` SupportedParameters []string `json:"supported_parameters,omitempty"`
// Thinking holds provider-specific reasoning/thinking budget capabilities.
// This is optional and currently used for Gemini thinking budget normalization.
Thinking *ThinkingSupport `json:"thinking,omitempty"`
}
// ThinkingSupport describes a model family's supported internal reasoning budget range.
// Values are interpreted in provider-native token units.
type ThinkingSupport struct {
// Min is the minimum allowed thinking budget (inclusive).
Min int `json:"min,omitempty"`
// Max is the maximum allowed thinking budget (inclusive).
Max int `json:"max,omitempty"`
// ZeroAllowed indicates whether 0 is a valid value (to disable thinking).
ZeroAllowed bool `json:"zero_allowed,omitempty"`
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
} }
// ModelRegistration tracks a model's availability // ModelRegistration tracks a model's availability
@@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
return result return result
} }
// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
// Returns nil if the model is unknown to the registry.
func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
r.mutex.RLock()
defer r.mutex.RUnlock()
if reg, ok := r.models[modelID]; ok && reg != nil {
return reg.Info
}
return nil
}
// convertModelToMap converts ModelInfo to the appropriate format for different handler types // convertModelToMap converts ModelInfo to the appropriate format for different handler types
func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any { func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any {
if model == nil { if model == nil {
@@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream) payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride) payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
} }
payload = disableGeminiThinkingConfig(payload, req.Model) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiImageAspectRatio(req.Model, payload) payload = fixGeminiImageAspectRatio(req.Model, payload)
metadataAction := "generateContent" metadataAction := "generateContent"
if req.Metadata != nil { if req.Metadata != nil {
@@ -63,9 +63,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
to := sdktranslator.FromString("gemini-cli") to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if hasOverride { if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
} }
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
action := "generateContent" action := "generateContent"
@@ -92,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
var lastStatus int var lastStatus int
var lastBody []byte var lastBody []byte
for _, attemptModel := range models { for idx, attemptModel := range models {
payload := append([]byte(nil), basePayload...) payload := append([]byte(nil), basePayload...)
if action == "countTokens" { if action == "countTokens" {
payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "project")
@@ -101,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel) payload = setJSONField(payload, "model", attemptModel)
} }
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
if errTok != nil { if errTok != nil {
@@ -166,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
lastBody = append([]byte(nil), data...) lastBody = append([]byte(nil), data...)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data)) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
if httpResp.StatusCode == 429 { if httpResp.StatusCode == 429 {
log.Debugf("gemini cli executor: rate limited, retrying with next model") if idx+1 < len(models) {
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
} else {
log.Debug("gemini cli executor: rate limited, no additional fallback model")
}
continue continue
} }
@@ -196,9 +204,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
to := sdktranslator.FromString("gemini-cli") to := sdktranslator.FromString("gemini-cli")
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if hasOverride { if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride) basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
} }
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload) basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id")) projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
@@ -219,11 +232,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
var lastStatus int var lastStatus int
var lastBody []byte var lastBody []byte
for _, attemptModel := range models { for idx, attemptModel := range models {
payload := append([]byte(nil), basePayload...) payload := append([]byte(nil), basePayload...)
payload = setJSONField(payload, "project", projectID) payload = setJSONField(payload, "project", projectID)
payload = setJSONField(payload, "model", attemptModel) payload = setJSONField(payload, "model", attemptModel)
payload = disableGeminiThinkingConfig(payload, attemptModel)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
if errTok != nil { if errTok != nil {
@@ -282,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
lastBody = append([]byte(nil), data...) lastBody = append([]byte(nil), data...)
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data)) log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
if httpResp.StatusCode == 429 { if httpResp.StatusCode == 429 {
log.Debugf("gemini cli executor: rate limited, retrying with next model") if idx+1 < len(models) {
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
} else {
log.Debug("gemini cli executor: rate limited, no additional fallback model")
}
continue continue
} }
err = statusErr{code: httpResp.StatusCode, msg: string(data)} err = statusErr{code: httpResp.StatusCode, msg: string(data)}
@@ -393,12 +409,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata) budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
for _, attemptModel := range models { for _, attemptModel := range models {
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false) payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
if hasOverride { if hasOverride && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride) payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
} }
payload = deleteJSONField(payload, "project") payload = deleteJSONField(payload, "project")
payload = deleteJSONField(payload, "model") payload = deleteJSONField(payload, "model")
payload = disableGeminiThinkingConfig(payload, attemptModel) payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload) payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
tok, errTok := tokenSource.Token() tok, errTok := tokenSource.Token()
@@ -623,29 +643,6 @@ func cliPreviewFallbackOrder(model string) []string {
} }
} }
func disableGeminiThinkingConfig(body []byte, model string) []byte {
if !geminiModelDisallowsThinking(model) {
return body
}
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
return updated
}
func geminiModelDisallowsThinking(model string) bool {
if model == "" {
return false
}
lower := strings.ToLower(model)
for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} {
if strings.Contains(lower, marker) {
return true
}
}
return false
}
// setJSONField sets a top-level JSON field on a byte slice payload via sjson. // setJSONField sets a top-level JSON field on a byte slice payload via sjson.
func setJSONField(body []byte, key, value string) []byte { func setJSONField(body []byte, key, value string) []byte {
if key == "" { if key == "" {
+18 -6
View File
@@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
} }
body = disableGeminiThinkingConfig(body, req.Model) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
action := "generateContent" action := "generateContent"
@@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true) body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride) body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
} }
body = disableGeminiThinkingConfig(body, req.Model) body = util.StripThinkingConfigIfUnsupported(req.Model, body)
body = fixGeminiImageAspectRatio(req.Model, body) body = fixGeminiImageAspectRatio(req.Model, body)
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent") url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
@@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
from := opts.SourceFormat from := opts.SourceFormat
to := sdktranslator.FromString("gemini") to := sdktranslator.FromString("gemini")
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false) translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok { if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
if budgetOverride != nil {
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
budgetOverride = &norm
}
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride) translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
} }
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model) translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq) translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
respCtx := context.WithValue(ctx, "alt", opts.Alt) respCtx := context.WithValue(ctx, "alt", opts.Alt)
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools") translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
@@ -11,6 +11,7 @@ import (
"strings" "strings"
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
"github.com/tidwall/sjson" "github.com/tidwall/sjson"
) )
@@ -136,7 +137,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
} }
// Build output Gemini CLI request JSON // Build output Gemini CLI request JSON
out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}` out := `{"model":"","request":{"contents":[]}}`
out, _ = sjson.Set(out, "model", modelName) out, _ = sjson.Set(out, "model", modelName)
if systemInstruction != nil { if systemInstruction != nil {
b, _ := json.Marshal(systemInstruction) b, _ := json.Marshal(systemInstruction)
@@ -151,21 +152,16 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
out, _ = sjson.SetRaw(out, "request.tools", string(b)) out, _ = sjson.SetRaw(out, "request.tools", string(b))
} }
// Map reasoning and sampling configs // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
if reasoningEffortResult.String() == "none" { if t.Get("type").String() == "enabled" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false) if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) budget := int(b.Int())
} else if reasoningEffortResult.String() == "auto" { budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
} else if reasoningEffortResult.String() == "low" { out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) }
} else if reasoningEffortResult.String() == "medium" { }
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
} }
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num) out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)
@@ -26,32 +26,57 @@ import (
// - []byte: The transformed request data in Gemini CLI API format // - []byte: The transformed request data in Gemini CLI API format
func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte { func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := bytes.Clone(inputRawJSON) rawJSON := bytes.Clone(inputRawJSON)
// Base envelope // Base envelope (no default thinkingConfig)
out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`) out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
// Model // Model
out, _ = sjson.SetBytes(out, "model", modelName) out, _ = sjson.SetBytes(out, "model", modelName)
// Reasoning effort -> thinkingBudget/include_thoughts // Reasoning effort -> thinkingBudget/include_thoughts
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
re := gjson.GetBytes(rawJSON, "reasoning_effort") re := gjson.GetBytes(rawJSON, "reasoning_effort")
if re.Exists() { hasOfficialThinking := re.Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
switch re.String() { switch re.String() {
case "none": case "none":
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts") out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto": case "auto":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "low": case "low":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "medium": case "medium":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
case "high": case "high":
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
default: default:
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
}
}
} }
} else {
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
} }
// Temperature/top_p/top_k // Temperature/top_p/top_k
@@ -11,6 +11,7 @@ import (
"strings" "strings"
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces" client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
"github.com/tidwall/sjson" "github.com/tidwall/sjson"
) )
@@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
} }
// Build output Gemini CLI request JSON // Build output Gemini CLI request JSON
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}` out := `{"contents":[]}`
out, _ = sjson.Set(out, "model", modelName) out, _ = sjson.Set(out, "model", modelName)
if systemInstruction != nil { if systemInstruction != nil {
b, _ := json.Marshal(systemInstruction) b, _ := json.Marshal(systemInstruction)
@@ -144,21 +145,16 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
out, _ = sjson.SetRaw(out, "tools", string(b)) out, _ = sjson.SetRaw(out, "tools", string(b))
} }
// Map reasoning and sampling configs // Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort") if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
if reasoningEffortResult.String() == "none" { if t.Get("type").String() == "enabled" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0) budget := int(b.Int())
} else if reasoningEffortResult.String() == "auto" { budget = util.NormalizeThinkingBudget(modelName, budget)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
} else if reasoningEffortResult.String() == "low" { out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) }
} else if reasoningEffortResult.String() == "medium" { }
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
} else if reasoningEffortResult.String() == "high" {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
} else {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
} }
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number { if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num) out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)
@@ -26,32 +26,58 @@ import (
// - []byte: The transformed request data in Gemini API format // - []byte: The transformed request data in Gemini API format
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte { func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
rawJSON := bytes.Clone(inputRawJSON) rawJSON := bytes.Clone(inputRawJSON)
// Base envelope // Base envelope (no default thinkingConfig)
out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`) out := []byte(`{"contents":[]}`)
// Model // Model
out, _ = sjson.SetBytes(out, "model", modelName) out, _ = sjson.SetBytes(out, "model", modelName)
// Reasoning effort -> thinkingBudget/include_thoughts // Reasoning effort -> thinkingBudget/include_thoughts
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
re := gjson.GetBytes(rawJSON, "reasoning_effort") re := gjson.GetBytes(rawJSON, "reasoning_effort")
if re.Exists() { hasOfficialThinking := re.Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
switch re.String() { switch re.String() {
case "none": case "none":
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts") out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto": case "auto":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low": case "low":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium": case "medium":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high": case "high":
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
default: default:
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
// Normalize budget to model range
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
} }
} else {
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
} }
// Temperature/top_p/top_k // Temperature/top_p/top_k
@@ -4,6 +4,7 @@ import (
"bytes" "bytes"
"strings" "strings"
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
"github.com/tidwall/sjson" "github.com/tidwall/sjson"
) )
@@ -15,8 +16,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
_ = modelName // Unused but required by interface _ = modelName // Unused but required by interface
_ = stream // Unused but required by interface _ = stream // Unused but required by interface
// Base Gemini API template // Base Gemini API template (do not include thinkingConfig by default)
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}` out := `{"contents":[]}`
root := gjson.ParseBytes(rawJSON) root := gjson.ParseBytes(rawJSON)
@@ -242,23 +243,52 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences) out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
} }
if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() { // OpenAI official reasoning fields take precedence
hasOfficialThinking := root.Get("reasoning.effort").Exists()
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
reasoningEffort := root.Get("reasoning.effort")
switch reasoningEffort.String() { switch reasoningEffort.String() {
case "none": case "none":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
case "auto": case "auto":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "minimal": case "minimal":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "low": case "low":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "medium": case "medium":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
case "high": case "high":
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
default: default:
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1) out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
// Cherry Studio extension (applies only when official fields are missing)
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
var setBudget bool
var normalized int
if v := tc.Get("thinking_budget"); v.Exists() {
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
setBudget = true
}
if v := tc.Get("include_thoughts"); v.Exists() {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
} else if setBudget {
if normalized != 0 {
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
}
}
} }
} }
return []byte(out) return []byte(out)
+16
View File
@@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
} }
return budgetPtr, includePtr, matched return budgetPtr, includePtr, matched
} }
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
// when the target model does not advertise Thinking capability. It cleans both
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
// in case upstream injected thinking for an unsupported model.
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
if ModelSupportsThinking(model) || len(body) == 0 {
return body
}
updated := body
// Gemini CLI path
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
// Standard Gemini path
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
return updated
}
+69
View File
@@ -0,0 +1,69 @@
package util
import (
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
)
// ModelSupportsThinking reports whether the given model has Thinking capability
// according to the model registry metadata (provider-agnostic).
func ModelSupportsThinking(model string) bool {
if model == "" {
return false
}
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
return info.Thinking != nil
}
return false
}
// NormalizeThinkingBudget clamps the requested thinking budget to the
// supported range for the specified model using registry metadata only.
// If the model is unknown or has no Thinking metadata, returns the original budget.
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
// or min (0 if zero is allowed and mid <= 0).
func NormalizeThinkingBudget(model string, budget int) int {
if budget == -1 { // dynamic
if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
if dynamicAllowed {
return -1
}
mid := (min + max) / 2
if mid <= 0 && zeroAllowed {
return 0
}
if mid <= 0 {
return min
}
return mid
}
return -1
}
if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
if budget == 0 {
if zeroAllowed {
return 0
}
return min
}
if budget < min {
return min
}
if budget > max {
return max
}
return budget
}
return budget
}
// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
if model == "" {
return false, 0, 0, false, false
}
info := registry.GetGlobalRegistry().GetModelInfo(model)
if info == nil || info.Thinking == nil {
return false, 0, 0, false, false
}
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
}