Merge pull request #280 from ben-vargas/feat-enable-gemini-3-cli
feat: enable Gemini 3 Pro Preview with OAuth support
This commit is contained in:
@@ -170,21 +170,21 @@ func GetGeminiCLIModels() []*ModelInfo {
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
|
||||
},
|
||||
// {
|
||||
// ID: "gemini-3-pro-preview-11-2025",
|
||||
// Object: "model",
|
||||
// Created: time.Now().Unix(),
|
||||
// OwnedBy: "google",
|
||||
// Type: "gemini",
|
||||
// Name: "models/gemini-3-pro-preview-11-2025",
|
||||
// Version: "3",
|
||||
// DisplayName: "Gemini 3 Pro Preview 11-2025",
|
||||
// Description: "Latest preview of Gemini Pro",
|
||||
// InputTokenLimit: 1048576,
|
||||
// OutputTokenLimit: 65536,
|
||||
// SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
// Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
// },
|
||||
{
|
||||
ID: "gemini-3-pro-preview",
|
||||
Object: "model",
|
||||
Created: time.Now().Unix(),
|
||||
OwnedBy: "google",
|
||||
Type: "gemini",
|
||||
Name: "models/gemini-3-pro-preview",
|
||||
Version: "3.0",
|
||||
DisplayName: "Gemini 3 Pro Preview",
|
||||
Description: "Gemini 3 Pro Preview",
|
||||
InputTokenLimit: 1048576,
|
||||
OutputTokenLimit: 65536,
|
||||
SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
|
||||
Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -99,7 +99,15 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
var lastStatus int
|
||||
var lastBody []byte
|
||||
|
||||
// Get max retry count from config, default to 3 if not set
|
||||
maxRetries := e.cfg.RequestRetry
|
||||
if maxRetries <= 0 {
|
||||
maxRetries = 3
|
||||
}
|
||||
|
||||
for idx, attemptModel := range models {
|
||||
// Inner retry loop for 429 errors on the same model
|
||||
for retryCount := 0; retryCount <= maxRetries; retryCount++ {
|
||||
payload := append([]byte(nil), basePayload...)
|
||||
if action == "countTokens" {
|
||||
payload = deleteJSONField(payload, "project")
|
||||
@@ -171,19 +179,45 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
|
||||
lastStatus = httpResp.StatusCode
|
||||
lastBody = append([]byte(nil), data...)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
if httpResp.StatusCode == 429 {
|
||||
if idx+1 < len(models) {
|
||||
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||
} else {
|
||||
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle 429 rate limit errors with retry
|
||||
if httpResp.StatusCode == 429 {
|
||||
if retryCount < maxRetries {
|
||||
// Parse retry delay from Google's response
|
||||
retryDelay := parseRetryDelay(data)
|
||||
log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
|
||||
|
||||
// Wait for the specified delay
|
||||
select {
|
||||
case <-time.After(retryDelay):
|
||||
// Continue to next retry iteration
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
// Context cancelled, return immediately
|
||||
err = ctx.Err()
|
||||
return resp, err
|
||||
}
|
||||
} else {
|
||||
// Exhausted retries for this model, try next model if available
|
||||
if idx+1 < len(models) {
|
||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
|
||||
break // Break inner loop to try next model
|
||||
} else {
|
||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, no additional fallback model", maxRetries, attemptModel)
|
||||
// No more models to try, will return error below
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Non-429 error, don't retry this model
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
return resp, err
|
||||
}
|
||||
|
||||
// Break inner loop if we hit this point (no retry needed or exhausted retries)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(lastBody) > 0 {
|
||||
appendAPIResponseChunk(ctx, e.cfg, lastBody)
|
||||
}
|
||||
@@ -235,8 +269,21 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
var lastStatus int
|
||||
var lastBody []byte
|
||||
|
||||
// Get max retry count from config, default to 3 if not set
|
||||
maxRetries := e.cfg.RequestRetry
|
||||
if maxRetries <= 0 {
|
||||
maxRetries = 3
|
||||
}
|
||||
|
||||
for idx, attemptModel := range models {
|
||||
payload := append([]byte(nil), basePayload...)
|
||||
var httpResp *http.Response
|
||||
var payload []byte
|
||||
var errDo error
|
||||
shouldContinueToNextModel := false
|
||||
|
||||
// Inner retry loop for 429 errors on the same model
|
||||
for retryCount := 0; retryCount <= maxRetries; retryCount++ {
|
||||
payload = append([]byte(nil), basePayload...)
|
||||
payload = setJSONField(payload, "project", projectID)
|
||||
payload = setJSONField(payload, "model", attemptModel)
|
||||
|
||||
@@ -275,7 +322,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
AuthValue: authValue,
|
||||
})
|
||||
|
||||
httpResp, errDo := httpClient.Do(reqHTTP)
|
||||
httpResp, errDo = httpClient.Do(reqHTTP)
|
||||
if errDo != nil {
|
||||
recordAPIResponseError(ctx, e.cfg, errDo)
|
||||
err = errDo
|
||||
@@ -296,18 +343,61 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
|
||||
lastStatus = httpResp.StatusCode
|
||||
lastBody = append([]byte(nil), data...)
|
||||
log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
|
||||
|
||||
// Handle 429 rate limit errors with retry
|
||||
if httpResp.StatusCode == 429 {
|
||||
if idx+1 < len(models) {
|
||||
log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
|
||||
} else {
|
||||
log.Debug("gemini cli executor: rate limited, no additional fallback model")
|
||||
}
|
||||
if retryCount < maxRetries {
|
||||
// Parse retry delay from Google's response
|
||||
retryDelay := parseRetryDelay(data)
|
||||
log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
|
||||
|
||||
// Wait for the specified delay
|
||||
select {
|
||||
case <-time.After(retryDelay):
|
||||
// Continue to next retry iteration
|
||||
continue
|
||||
case <-ctx.Done():
|
||||
// Context cancelled, return immediately
|
||||
err = ctx.Err()
|
||||
return nil, err
|
||||
}
|
||||
} else {
|
||||
// Exhausted retries for this model, try next model if available
|
||||
if idx+1 < len(models) {
|
||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
|
||||
shouldContinueToNextModel = true
|
||||
break // Break inner loop to try next model
|
||||
} else {
|
||||
log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel)
|
||||
// No more models to try, will return error below
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Non-429 error, don't retry this model
|
||||
err = statusErr{code: httpResp.StatusCode, msg: string(data)}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Break inner loop if we hit this point (no retry needed or exhausted retries)
|
||||
break
|
||||
}
|
||||
|
||||
// Success - httpResp.StatusCode is 2xx, break out of retry loop
|
||||
// and proceed to streaming logic below
|
||||
break
|
||||
}
|
||||
|
||||
// If we need to try the next fallback model, skip streaming logic
|
||||
if shouldContinueToNextModel {
|
||||
continue
|
||||
}
|
||||
|
||||
// If we have a failed response (non-2xx), don't attempt streaming
|
||||
// Continue outer loop to try next model or return error
|
||||
if httpResp == nil || httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
|
||||
continue
|
||||
}
|
||||
|
||||
out := make(chan cliproxyexecutor.StreamChunk)
|
||||
stream = out
|
||||
go func(resp *http.Response, reqBody []byte, attempt string) {
|
||||
@@ -769,3 +859,48 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
|
||||
}
|
||||
return rawJSON
|
||||
}
|
||||
|
||||
// parseRetryDelay extracts the retry delay from a Google API 429 error response.
|
||||
// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
|
||||
// Returns the duration to wait, or a default duration if parsing fails.
|
||||
func parseRetryDelay(errorBody []byte) time.Duration {
|
||||
const defaultDelay = 1 * time.Second
|
||||
const maxDelay = 60 * time.Second
|
||||
|
||||
// Try to parse the retryDelay from the error response
|
||||
// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
|
||||
details := gjson.GetBytes(errorBody, "error.details")
|
||||
if !details.Exists() || !details.IsArray() {
|
||||
log.Debugf("parseRetryDelay: no error.details found, using default delay %v", defaultDelay)
|
||||
return defaultDelay
|
||||
}
|
||||
|
||||
for _, detail := range details.Array() {
|
||||
typeVal := detail.Get("@type").String()
|
||||
if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
|
||||
retryDelay := detail.Get("retryDelay").String()
|
||||
if retryDelay != "" {
|
||||
// Parse duration string like "0.847655010s"
|
||||
duration, err := time.ParseDuration(retryDelay)
|
||||
if err != nil {
|
||||
log.Debugf("parseRetryDelay: failed to parse duration %q: %v, using default", retryDelay, err)
|
||||
return defaultDelay
|
||||
}
|
||||
// Cap at maxDelay to prevent excessive waits
|
||||
if duration > maxDelay {
|
||||
log.Debugf("parseRetryDelay: capping delay from %v to %v", duration, maxDelay)
|
||||
return maxDelay
|
||||
}
|
||||
if duration < 0 {
|
||||
log.Debugf("parseRetryDelay: negative delay %v, using default", duration)
|
||||
return defaultDelay
|
||||
}
|
||||
log.Debugf("parseRetryDelay: using delay %v from API response", duration)
|
||||
return duration
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Debugf("parseRetryDelay: no RetryInfo found, using default delay %v", defaultDelay)
|
||||
return defaultDelay
|
||||
}
|
||||
|
||||
@@ -88,6 +88,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
|
||||
}
|
||||
}
|
||||
|
||||
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
|
||||
// This matches the official Gemini CLI behavior which always sends:
|
||||
// { thinkingBudget: -1, includeThoughts: true }
|
||||
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
|
||||
if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
|
||||
}
|
||||
|
||||
// Temperature/top_p/top_k
|
||||
if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
|
||||
out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
|
||||
"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/tidwall/gjson"
|
||||
"github.com/tidwall/sjson"
|
||||
)
|
||||
@@ -294,6 +295,17 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
|
||||
// This matches the official Gemini CLI behavior which always sends:
|
||||
// { thinkingBudget: -1, includeThoughts: true }
|
||||
// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
|
||||
if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
|
||||
out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
|
||||
log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
|
||||
}
|
||||
|
||||
result := []byte(out)
|
||||
result = common.AttachDefaultSafetySettings(result, "safetySettings")
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user