Merge pull request #185 from router-for-me/thinking
Feat: Add reasoning effort support for Gemini models
This commit is contained in:
@@ -84,6 +84,7 @@ func GeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-pro",
|
ID: "gemini-2.5-pro",
|
||||||
@@ -98,6 +99,7 @@ func GeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-flash-lite",
|
ID: "gemini-2.5-flash-lite",
|
||||||
@@ -112,6 +114,7 @@ func GeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-flash-image-preview",
|
ID: "gemini-2.5-flash-image-preview",
|
||||||
@@ -126,6 +129,7 @@ func GeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 8192,
|
OutputTokenLimit: 8192,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
// image models don't support thinkingConfig; leave Thinking nil
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ID: "gemini-2.5-flash-image",
|
ID: "gemini-2.5-flash-image",
|
||||||
@@ -140,6 +144,7 @@ func GeminiModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 8192,
|
OutputTokenLimit: 8192,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
// image models don't support thinkingConfig; leave Thinking nil
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -152,9 +157,8 @@ func GetGeminiCLIModels() []*ModelInfo { return GeminiModels() }
|
|||||||
|
|
||||||
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
// GetAIStudioModels returns the Gemini model definitions for AI Studio integrations
|
||||||
func GetAIStudioModels() []*ModelInfo {
|
func GetAIStudioModels() []*ModelInfo {
|
||||||
models := make([]*ModelInfo, 0, 8)
|
base := GeminiModels()
|
||||||
models = append(models, GeminiModels()...)
|
return append(base,
|
||||||
models = append(models,
|
|
||||||
&ModelInfo{
|
&ModelInfo{
|
||||||
ID: "gemini-pro-latest",
|
ID: "gemini-pro-latest",
|
||||||
Object: "model",
|
Object: "model",
|
||||||
@@ -168,6 +172,7 @@ func GetAIStudioModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
&ModelInfo{
|
&ModelInfo{
|
||||||
ID: "gemini-flash-latest",
|
ID: "gemini-flash-latest",
|
||||||
@@ -182,6 +187,7 @@ func GetAIStudioModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
&ModelInfo{
|
&ModelInfo{
|
||||||
ID: "gemini-flash-lite-latest",
|
ID: "gemini-flash-lite-latest",
|
||||||
@@ -196,9 +202,9 @@ func GetAIStudioModels() []*ModelInfo {
|
|||||||
InputTokenLimit: 1048576,
|
InputTokenLimit: 1048576,
|
||||||
OutputTokenLimit: 65536,
|
OutputTokenLimit: 65536,
|
||||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||||
|
Thinking: &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
return models
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetOpenAIModels returns the standard OpenAI model definitions
|
// GetOpenAIModels returns the standard OpenAI model definitions
|
||||||
|
|||||||
@@ -45,6 +45,23 @@ type ModelInfo struct {
|
|||||||
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
|
MaxCompletionTokens int `json:"max_completion_tokens,omitempty"`
|
||||||
// SupportedParameters lists supported parameters
|
// SupportedParameters lists supported parameters
|
||||||
SupportedParameters []string `json:"supported_parameters,omitempty"`
|
SupportedParameters []string `json:"supported_parameters,omitempty"`
|
||||||
|
|
||||||
|
// Thinking holds provider-specific reasoning/thinking budget capabilities.
|
||||||
|
// This is optional and currently used for Gemini thinking budget normalization.
|
||||||
|
Thinking *ThinkingSupport `json:"thinking,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ThinkingSupport describes a model family's supported internal reasoning budget range.
|
||||||
|
// Values are interpreted in provider-native token units.
|
||||||
|
type ThinkingSupport struct {
|
||||||
|
// Min is the minimum allowed thinking budget (inclusive).
|
||||||
|
Min int `json:"min,omitempty"`
|
||||||
|
// Max is the maximum allowed thinking budget (inclusive).
|
||||||
|
Max int `json:"max,omitempty"`
|
||||||
|
// ZeroAllowed indicates whether 0 is a valid value (to disable thinking).
|
||||||
|
ZeroAllowed bool `json:"zero_allowed,omitempty"`
|
||||||
|
// DynamicAllowed indicates whether -1 is a valid value (dynamic thinking budget).
|
||||||
|
DynamicAllowed bool `json:"dynamic_allowed,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ModelRegistration tracks a model's availability
|
// ModelRegistration tracks a model's availability
|
||||||
@@ -652,6 +669,17 @@ func (r *ModelRegistry) GetModelProviders(modelID string) []string {
|
|||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetModelInfo returns the registered ModelInfo for the given model ID, if present.
|
||||||
|
// Returns nil if the model is unknown to the registry.
|
||||||
|
func (r *ModelRegistry) GetModelInfo(modelID string) *ModelInfo {
|
||||||
|
r.mutex.RLock()
|
||||||
|
defer r.mutex.RUnlock()
|
||||||
|
if reg, ok := r.models[modelID]; ok && reg != nil {
|
||||||
|
return reg.Info
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// convertModelToMap converts ModelInfo to the appropriate format for different handler types
|
// convertModelToMap converts ModelInfo to the appropriate format for different handler types
|
||||||
func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any {
|
func (r *ModelRegistry) convertModelToMap(model *ModelInfo, handlerType string) map[string]any {
|
||||||
if model == nil {
|
if model == nil {
|
||||||
|
|||||||
@@ -256,10 +256,14 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
payload = util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
payload = disableGeminiThinkingConfig(payload, req.Model)
|
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||||
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
payload = fixGeminiImageAspectRatio(req.Model, payload)
|
||||||
metadataAction := "generateContent"
|
metadataAction := "generateContent"
|
||||||
if req.Metadata != nil {
|
if req.Metadata != nil {
|
||||||
|
|||||||
@@ -63,9 +63,14 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
to := sdktranslator.FromString("gemini-cli")
|
to := sdktranslator.FromString("gemini-cli")
|
||||||
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
||||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if hasOverride {
|
if hasOverride && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
|
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
|
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
@@ -92,7 +97,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
var lastStatus int
|
var lastStatus int
|
||||||
var lastBody []byte
|
var lastBody []byte
|
||||||
|
|
||||||
for _, attemptModel := range models {
|
for idx, attemptModel := range models {
|
||||||
payload := append([]byte(nil), basePayload...)
|
payload := append([]byte(nil), basePayload...)
|
||||||
if action == "countTokens" {
|
if action == "countTokens" {
|
||||||
payload = deleteJSONField(payload, "project")
|
payload = deleteJSONField(payload, "project")
|
||||||
@@ -101,7 +106,6 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
payload = setJSONField(payload, "project", projectID)
|
payload = setJSONField(payload, "project", projectID)
|
||||||
payload = setJSONField(payload, "model", attemptModel)
|
payload = setJSONField(payload, "model", attemptModel)
|
||||||
}
|
}
|
||||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
|
||||||
|
|
||||||
tok, errTok := tokenSource.Token()
|
tok, errTok := tokenSource.Token()
|
||||||
if errTok != nil {
|
if errTok != nil {
|
||||||
@@ -166,7 +170,11 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
|||||||
lastBody = append([]byte(nil), data...)
|
lastBody = append([]byte(nil), data...)
|
||||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
|
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
|
||||||
if httpResp.StatusCode == 429 {
|
if httpResp.StatusCode == 429 {
|
||||||
log.Debugf("gemini cli executor: rate limited, retrying with next model")
|
if idx+1 < len(models) {
|
||||||
|
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||||
|
} else {
|
||||||
|
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -196,9 +204,14 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
to := sdktranslator.FromString("gemini-cli")
|
to := sdktranslator.FromString("gemini-cli")
|
||||||
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
||||||
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
if hasOverride {
|
if hasOverride && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
|
basePayload = util.ApplyGeminiCLIThinkingConfig(basePayload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
|
basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
|
||||||
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
|
||||||
|
|
||||||
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
|
projectID := strings.TrimSpace(stringValue(auth.Metadata, "project_id"))
|
||||||
@@ -219,11 +232,10 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
var lastStatus int
|
var lastStatus int
|
||||||
var lastBody []byte
|
var lastBody []byte
|
||||||
|
|
||||||
for _, attemptModel := range models {
|
for idx, attemptModel := range models {
|
||||||
payload := append([]byte(nil), basePayload...)
|
payload := append([]byte(nil), basePayload...)
|
||||||
payload = setJSONField(payload, "project", projectID)
|
payload = setJSONField(payload, "project", projectID)
|
||||||
payload = setJSONField(payload, "model", attemptModel)
|
payload = setJSONField(payload, "model", attemptModel)
|
||||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
|
||||||
|
|
||||||
tok, errTok := tokenSource.Token()
|
tok, errTok := tokenSource.Token()
|
||||||
if errTok != nil {
|
if errTok != nil {
|
||||||
@@ -282,7 +294,11 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
|||||||
lastBody = append([]byte(nil), data...)
|
lastBody = append([]byte(nil), data...)
|
||||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
|
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, string(data))
|
||||||
if httpResp.StatusCode == 429 {
|
if httpResp.StatusCode == 429 {
|
||||||
log.Debugf("gemini cli executor: rate limited, retrying with next model")
|
if idx+1 < len(models) {
|
||||||
|
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||||
|
} else {
|
||||||
|
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||||
@@ -393,12 +409,16 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
|
|||||||
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
budgetOverride, includeOverride, hasOverride := util.GeminiThinkingFromMetadata(req.Metadata)
|
||||||
for _, attemptModel := range models {
|
for _, attemptModel := range models {
|
||||||
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
|
||||||
if hasOverride {
|
if hasOverride && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
payload = util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
payload = deleteJSONField(payload, "project")
|
payload = deleteJSONField(payload, "project")
|
||||||
payload = deleteJSONField(payload, "model")
|
payload = deleteJSONField(payload, "model")
|
||||||
payload = disableGeminiThinkingConfig(payload, attemptModel)
|
payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
|
||||||
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
|
payload = fixGeminiCLIImageAspectRatio(attemptModel, payload)
|
||||||
|
|
||||||
tok, errTok := tokenSource.Token()
|
tok, errTok := tokenSource.Token()
|
||||||
@@ -623,29 +643,6 @@ func cliPreviewFallbackOrder(model string) []string {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func disableGeminiThinkingConfig(body []byte, model string) []byte {
|
|
||||||
if !geminiModelDisallowsThinking(model) {
|
|
||||||
return body
|
|
||||||
}
|
|
||||||
|
|
||||||
updated := deleteJSONField(body, "request.generationConfig.thinkingConfig")
|
|
||||||
updated = deleteJSONField(updated, "generationConfig.thinkingConfig")
|
|
||||||
return updated
|
|
||||||
}
|
|
||||||
|
|
||||||
func geminiModelDisallowsThinking(model string) bool {
|
|
||||||
if model == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
lower := strings.ToLower(model)
|
|
||||||
for _, marker := range []string{"gemini-2.5-flash-image-preview", "gemini-2.5-flash-image"} {
|
|
||||||
if strings.Contains(lower, marker) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// setJSONField sets a top-level JSON field on a byte slice payload via sjson.
|
// setJSONField sets a top-level JSON field on a byte slice payload via sjson.
|
||||||
func setJSONField(body []byte, key, value string) []byte {
|
func setJSONField(body []byte, key, value string) []byte {
|
||||||
if key == "" {
|
if key == "" {
|
||||||
|
|||||||
@@ -78,10 +78,14 @@ func (e *GeminiExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
body = disableGeminiThinkingConfig(body, req.Model)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
@@ -166,10 +170,14 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
body = util.ApplyGeminiThinkingConfig(body, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
body = disableGeminiThinkingConfig(body, req.Model)
|
body = util.StripThinkingConfigIfUnsupported(req.Model, body)
|
||||||
body = fixGeminiImageAspectRatio(req.Model, body)
|
body = fixGeminiImageAspectRatio(req.Model, body)
|
||||||
|
|
||||||
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
|
url := fmt.Sprintf("%s/%s/models/%s:%s", glEndpoint, glAPIVersion, req.Model, "streamGenerateContent")
|
||||||
@@ -269,10 +277,14 @@ func (e *GeminiExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
|
|||||||
from := opts.SourceFormat
|
from := opts.SourceFormat
|
||||||
to := sdktranslator.FromString("gemini")
|
to := sdktranslator.FromString("gemini")
|
||||||
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
translatedReq := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
|
||||||
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok {
|
if budgetOverride, includeOverride, ok := util.GeminiThinkingFromMetadata(req.Metadata); ok && util.ModelSupportsThinking(req.Model) {
|
||||||
|
if budgetOverride != nil {
|
||||||
|
norm := util.NormalizeThinkingBudget(req.Model, *budgetOverride)
|
||||||
|
budgetOverride = &norm
|
||||||
|
}
|
||||||
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
translatedReq = util.ApplyGeminiThinkingConfig(translatedReq, budgetOverride, includeOverride)
|
||||||
}
|
}
|
||||||
translatedReq = disableGeminiThinkingConfig(translatedReq, req.Model)
|
translatedReq = util.StripThinkingConfigIfUnsupported(req.Model, translatedReq)
|
||||||
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
translatedReq = fixGeminiImageAspectRatio(req.Model, translatedReq)
|
||||||
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
respCtx := context.WithValue(ctx, "alt", opts.Alt)
|
||||||
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
translatedReq, _ = sjson.DeleteBytes(translatedReq, "tools")
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
@@ -136,7 +137,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Build output Gemini CLI request JSON
|
// Build output Gemini CLI request JSON
|
||||||
out := `{"model":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}}`
|
out := `{"model":"","request":{"contents":[]}}`
|
||||||
out, _ = sjson.Set(out, "model", modelName)
|
out, _ = sjson.Set(out, "model", modelName)
|
||||||
if systemInstruction != nil {
|
if systemInstruction != nil {
|
||||||
b, _ := json.Marshal(systemInstruction)
|
b, _ := json.Marshal(systemInstruction)
|
||||||
@@ -151,21 +152,16 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
|
|||||||
out, _ = sjson.SetRaw(out, "request.tools", string(b))
|
out, _ = sjson.SetRaw(out, "request.tools", string(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map reasoning and sampling configs
|
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when type==enabled
|
||||||
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
|
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||||
if reasoningEffortResult.String() == "none" {
|
if t.Get("type").String() == "enabled" {
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", false)
|
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
|
budget := int(b.Int())
|
||||||
} else if reasoningEffortResult.String() == "auto" {
|
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||||
} else if reasoningEffortResult.String() == "low" {
|
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
}
|
||||||
} else if reasoningEffortResult.String() == "medium" {
|
}
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
|
||||||
} else if reasoningEffortResult.String() == "high" {
|
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
|
|
||||||
} else {
|
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
|
||||||
}
|
}
|
||||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||||
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)
|
out, _ = sjson.Set(out, "request.generationConfig.temperature", v.Num)
|
||||||
|
|||||||
+33
-8
@@ -26,32 +26,57 @@ import (
|
|||||||
// - []byte: The transformed request data in Gemini CLI API format
|
// - []byte: The transformed request data in Gemini CLI API format
|
||||||
func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
|
func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||||
rawJSON := bytes.Clone(inputRawJSON)
|
rawJSON := bytes.Clone(inputRawJSON)
|
||||||
// Base envelope
|
// Base envelope (no default thinkingConfig)
|
||||||
out := []byte(`{"project":"","request":{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}},"model":"gemini-2.5-pro"}`)
|
out := []byte(`{"project":"","request":{"contents":[]},"model":"gemini-2.5-pro"}`)
|
||||||
|
|
||||||
// Model
|
// Model
|
||||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||||
|
|
||||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||||
|
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||||
if re.Exists() {
|
hasOfficialThinking := re.Exists()
|
||||||
|
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||||
switch re.String() {
|
switch re.String() {
|
||||||
case "none":
|
case "none":
|
||||||
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
|
out, _ = sjson.DeleteBytes(out, "request.generationConfig.thinkingConfig.include_thoughts")
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||||
case "auto":
|
case "auto":
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "low":
|
case "low":
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 1024)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "medium":
|
case "medium":
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 8192)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "high":
|
case "high":
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", 24576)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
default:
|
default:
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||||
|
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||||
|
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||||
|
var setBudget bool
|
||||||
|
var normalized int
|
||||||
|
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||||
|
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||||
|
setBudget = true
|
||||||
|
}
|
||||||
|
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||||
|
} else if setBudget {
|
||||||
|
if normalized != 0 {
|
||||||
|
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Temperature/top_p/top_k
|
// Temperature/top_p/top_k
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
client "github.com/router-for-me/CLIProxyAPI/v6/internal/interfaces"
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
@@ -129,7 +130,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Build output Gemini CLI request JSON
|
// Build output Gemini CLI request JSON
|
||||||
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
|
out := `{"contents":[]}`
|
||||||
out, _ = sjson.Set(out, "model", modelName)
|
out, _ = sjson.Set(out, "model", modelName)
|
||||||
if systemInstruction != nil {
|
if systemInstruction != nil {
|
||||||
b, _ := json.Marshal(systemInstruction)
|
b, _ := json.Marshal(systemInstruction)
|
||||||
@@ -144,21 +145,16 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
|
|||||||
out, _ = sjson.SetRaw(out, "tools", string(b))
|
out, _ = sjson.SetRaw(out, "tools", string(b))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map reasoning and sampling configs
|
// Map Anthropic thinking -> Gemini thinkingBudget/include_thoughts when enabled
|
||||||
reasoningEffortResult := gjson.GetBytes(rawJSON, "reasoning_effort")
|
if t := gjson.GetBytes(rawJSON, "thinking"); t.Exists() && t.IsObject() && util.ModelSupportsThinking(modelName) {
|
||||||
if reasoningEffortResult.String() == "none" {
|
if t.Get("type").String() == "enabled" {
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
|
if b := t.Get("budget_tokens"); b.Exists() && b.Type == gjson.Number {
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
budget := int(b.Int())
|
||||||
} else if reasoningEffortResult.String() == "auto" {
|
budget = util.NormalizeThinkingBudget(modelName, budget)
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", budget)
|
||||||
} else if reasoningEffortResult.String() == "low" {
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
}
|
||||||
} else if reasoningEffortResult.String() == "medium" {
|
}
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
|
||||||
} else if reasoningEffortResult.String() == "high" {
|
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
|
|
||||||
} else {
|
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
|
||||||
}
|
}
|
||||||
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
if v := gjson.GetBytes(rawJSON, "temperature"); v.Exists() && v.Type == gjson.Number {
|
||||||
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)
|
out, _ = sjson.Set(out, "generationConfig.temperature", v.Num)
|
||||||
|
|||||||
@@ -26,32 +26,58 @@ import (
|
|||||||
// - []byte: The transformed request data in Gemini API format
|
// - []byte: The transformed request data in Gemini API format
|
||||||
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
|
func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) []byte {
|
||||||
rawJSON := bytes.Clone(inputRawJSON)
|
rawJSON := bytes.Clone(inputRawJSON)
|
||||||
// Base envelope
|
// Base envelope (no default thinkingConfig)
|
||||||
out := []byte(`{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`)
|
out := []byte(`{"contents":[]}`)
|
||||||
|
|
||||||
// Model
|
// Model
|
||||||
out, _ = sjson.SetBytes(out, "model", modelName)
|
out, _ = sjson.SetBytes(out, "model", modelName)
|
||||||
|
|
||||||
// Reasoning effort -> thinkingBudget/include_thoughts
|
// Reasoning effort -> thinkingBudget/include_thoughts
|
||||||
|
// Note: OpenAI official fields take precedence over extra_body.google.thinking_config
|
||||||
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
re := gjson.GetBytes(rawJSON, "reasoning_effort")
|
||||||
if re.Exists() {
|
hasOfficialThinking := re.Exists()
|
||||||
|
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||||
switch re.String() {
|
switch re.String() {
|
||||||
case "none":
|
case "none":
|
||||||
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
|
out, _ = sjson.DeleteBytes(out, "generationConfig.thinkingConfig.include_thoughts")
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||||
case "auto":
|
case "auto":
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "low":
|
case "low":
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "medium":
|
case "medium":
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "high":
|
case "high":
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
default:
|
default:
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cherry Studio extension extra_body.google.thinking_config (effective only when official fields are absent)
|
||||||
|
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||||
|
if tc := gjson.GetBytes(rawJSON, "extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||||
|
var setBudget bool
|
||||||
|
var normalized int
|
||||||
|
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||||
|
// Normalize budget to model range
|
||||||
|
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||||
|
setBudget = true
|
||||||
|
}
|
||||||
|
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||||
|
} else if setBudget {
|
||||||
|
if normalized != 0 {
|
||||||
|
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
out, _ = sjson.SetBytes(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Temperature/top_p/top_k
|
// Temperature/top_p/top_k
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"bytes"
|
"bytes"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
"github.com/tidwall/sjson"
|
"github.com/tidwall/sjson"
|
||||||
)
|
)
|
||||||
@@ -15,8 +16,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
|||||||
_ = modelName // Unused but required by interface
|
_ = modelName // Unused but required by interface
|
||||||
_ = stream // Unused but required by interface
|
_ = stream // Unused but required by interface
|
||||||
|
|
||||||
// Base Gemini API template
|
// Base Gemini API template (do not include thinkingConfig by default)
|
||||||
out := `{"contents":[],"generationConfig":{"thinkingConfig":{"include_thoughts":true}}}`
|
out := `{"contents":[]}`
|
||||||
|
|
||||||
root := gjson.ParseBytes(rawJSON)
|
root := gjson.ParseBytes(rawJSON)
|
||||||
|
|
||||||
@@ -242,23 +243,52 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
|||||||
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
|
out, _ = sjson.Set(out, "generationConfig.stopSequences", sequences)
|
||||||
}
|
}
|
||||||
|
|
||||||
if reasoningEffort := root.Get("reasoning.effort"); reasoningEffort.Exists() {
|
// OpenAI official reasoning fields take precedence
|
||||||
|
hasOfficialThinking := root.Get("reasoning.effort").Exists()
|
||||||
|
if hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||||
|
reasoningEffort := root.Get("reasoning.effort")
|
||||||
switch reasoningEffort.String() {
|
switch reasoningEffort.String() {
|
||||||
case "none":
|
case "none":
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", false)
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 0)
|
||||||
case "auto":
|
case "auto":
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "minimal":
|
case "minimal":
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 1024)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 1024))
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "low":
|
case "low":
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 4096)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 4096))
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "medium":
|
case "medium":
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 8192)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 8192))
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
case "high":
|
case "high":
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", 24576)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", util.NormalizeThinkingBudget(modelName, 32768))
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
default:
|
default:
|
||||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cherry Studio extension (applies only when official fields are missing)
|
||||||
|
if !hasOfficialThinking && util.ModelSupportsThinking(modelName) {
|
||||||
|
if tc := root.Get("extra_body.google.thinking_config"); tc.Exists() && tc.IsObject() {
|
||||||
|
var setBudget bool
|
||||||
|
var normalized int
|
||||||
|
if v := tc.Get("thinking_budget"); v.Exists() {
|
||||||
|
normalized = util.NormalizeThinkingBudget(modelName, int(v.Int()))
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", normalized)
|
||||||
|
setBudget = true
|
||||||
|
}
|
||||||
|
if v := tc.Get("include_thoughts"); v.Exists() {
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", v.Bool())
|
||||||
|
} else if setBudget {
|
||||||
|
if normalized != 0 {
|
||||||
|
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return []byte(out)
|
return []byte(out)
|
||||||
|
|||||||
@@ -179,3 +179,19 @@ func GeminiThinkingFromMetadata(metadata map[string]any) (*int, *bool, bool) {
|
|||||||
}
|
}
|
||||||
return budgetPtr, includePtr, matched
|
return budgetPtr, includePtr, matched
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// StripThinkingConfigIfUnsupported removes thinkingConfig from the request body
|
||||||
|
// when the target model does not advertise Thinking capability. It cleans both
|
||||||
|
// standard Gemini and Gemini CLI JSON envelopes. This acts as a final safety net
|
||||||
|
// in case upstream injected thinking for an unsupported model.
|
||||||
|
func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {
|
||||||
|
if ModelSupportsThinking(model) || len(body) == 0 {
|
||||||
|
return body
|
||||||
|
}
|
||||||
|
updated := body
|
||||||
|
// Gemini CLI path
|
||||||
|
updated, _ = sjson.DeleteBytes(updated, "request.generationConfig.thinkingConfig")
|
||||||
|
// Standard Gemini path
|
||||||
|
updated, _ = sjson.DeleteBytes(updated, "generationConfig.thinkingConfig")
|
||||||
|
return updated
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
package util
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ModelSupportsThinking reports whether the given model has Thinking capability
|
||||||
|
// according to the model registry metadata (provider-agnostic).
|
||||||
|
func ModelSupportsThinking(model string) bool {
|
||||||
|
if model == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
|
||||||
|
return info.Thinking != nil
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// NormalizeThinkingBudget clamps the requested thinking budget to the
|
||||||
|
// supported range for the specified model using registry metadata only.
|
||||||
|
// If the model is unknown or has no Thinking metadata, returns the original budget.
|
||||||
|
// For dynamic (-1), returns -1 if DynamicAllowed; otherwise approximates mid-range
|
||||||
|
// or min (0 if zero is allowed and mid <= 0).
|
||||||
|
func NormalizeThinkingBudget(model string, budget int) int {
|
||||||
|
if budget == -1 { // dynamic
|
||||||
|
if found, min, max, zeroAllowed, dynamicAllowed := thinkingRangeFromRegistry(model); found {
|
||||||
|
if dynamicAllowed {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
mid := (min + max) / 2
|
||||||
|
if mid <= 0 && zeroAllowed {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
if mid <= 0 {
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
return mid
|
||||||
|
}
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
if found, min, max, zeroAllowed, _ := thinkingRangeFromRegistry(model); found {
|
||||||
|
if budget == 0 {
|
||||||
|
if zeroAllowed {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
if budget < min {
|
||||||
|
return min
|
||||||
|
}
|
||||||
|
if budget > max {
|
||||||
|
return max
|
||||||
|
}
|
||||||
|
return budget
|
||||||
|
}
|
||||||
|
return budget
|
||||||
|
}
|
||||||
|
|
||||||
|
// thinkingRangeFromRegistry attempts to read thinking ranges from the model registry.
|
||||||
|
func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zeroAllowed bool, dynamicAllowed bool) {
|
||||||
|
if model == "" {
|
||||||
|
return false, 0, 0, false, false
|
||||||
|
}
|
||||||
|
info := registry.GetGlobalRegistry().GetModelInfo(model)
|
||||||
|
if info == nil || info.Thinking == nil {
|
||||||
|
return false, 0, 0, false, false
|
||||||
|
}
|
||||||
|
return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user