diff --git a/internal/registry/model_definitions.go b/internal/registry/model_definitions.go
index eea00daf..f7c41224 100644
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -170,21 +170,21 @@ func GetGeminiCLIModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
-		// {
-		// 	ID:                         "gemini-3-pro-preview-11-2025",
-		// 	Object:                     "model",
-		// 	Created:                    time.Now().Unix(),
-		// 	OwnedBy:                    "google",
-		// 	Type:                       "gemini",
-		// 	Name:                       "models/gemini-3-pro-preview-11-2025",
-		// 	Version:                    "3",
-		// 	DisplayName:                "Gemini 3 Pro Preview 11-2025",
-		// 	Description:                "Latest preview of Gemini Pro",
-		// 	InputTokenLimit:            1048576,
-		// 	OutputTokenLimit:           65536,
-		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-		// 	Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
-		// },
+		{
+			ID:                         "gemini-3-pro-preview",
+			Object:                     "model",
+			Created:                    time.Now().Unix(),
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Preview",
+			Description:                "Gemini 3 Pro Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		},
 	}
 }
 
diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go
index 9a2c2602..2f48871b 100644
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -99,89 +99,123 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	var lastStatus int
 	var lastBody []byte
 
+	// Get max retry count from config, default to 3 if not set
+	maxRetries := e.cfg.RequestRetry
+	if maxRetries <= 0 {
+		maxRetries = 3
+	}
+
 	for idx, attemptModel := range models {
-		payload := append([]byte(nil), basePayload...)
-		if action == "countTokens" {
-			payload = deleteJSONField(payload, "project")
-			payload = deleteJSONField(payload, "model")
-		} else {
-			payload = setJSONField(payload, "project", projectID)
-			payload = setJSONField(payload, "model", attemptModel)
-		}
-
-		tok, errTok := tokenSource.Token()
-		if errTok != nil {
-			err = errTok
-			return resp, err
-		}
-		updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
-
-		url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, action)
-		if opts.Alt != "" && action != "countTokens" {
-			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
-		}
-
-		reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
-		if errReq != nil {
-			err = errReq
-			return resp, err
-		}
-		reqHTTP.Header.Set("Content-Type", "application/json")
-		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
-		applyGeminiCLIHeaders(reqHTTP)
-		reqHTTP.Header.Set("Accept", "application/json")
-		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
-			URL:       url,
-			Method:    http.MethodPost,
-			Headers:   reqHTTP.Header.Clone(),
-			Body:      payload,
-			Provider:  e.Identifier(),
-			AuthID:    authID,
-			AuthLabel: authLabel,
-			AuthType:  authType,
-			AuthValue: authValue,
-		})
-
-		httpResp, errDo := httpClient.Do(reqHTTP)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			err = errDo
-			return resp, err
-		}
-
-		data, errRead := io.ReadAll(httpResp.Body)
-		if errClose := httpResp.Body.Close(); errClose != nil {
-			log.Errorf("gemini cli executor: close response body error: %v", errClose)
-		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if errRead != nil {
-			recordAPIResponseError(ctx, e.cfg, errRead)
-			err = errRead
-			return resp, err
-		}
-		appendAPIResponseChunk(ctx, e.cfg, data)
-		if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
-			reporter.publish(ctx, parseGeminiCLIUsage(data))
-			var param any
-			out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, &param)
-			resp = cliproxyexecutor.Response{Payload: []byte(out)}
-			return resp, nil
-		}
-
-		lastStatus = httpResp.StatusCode
-		lastBody = append([]byte(nil), data...)
-		log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
-		if httpResp.StatusCode == 429 {
-			if idx+1 < len(models) {
-				log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
+		// Inner retry loop for 429 errors on the same model
+		for retryCount := 0; retryCount <= maxRetries; retryCount++ {
+			payload := append([]byte(nil), basePayload...)
+			if action == "countTokens" {
+				payload = deleteJSONField(payload, "project")
+				payload = deleteJSONField(payload, "model")
 			} else {
-				log.Debug("gemini cli executor: rate limited, no additional fallback model")
+				payload = setJSONField(payload, "project", projectID)
+				payload = setJSONField(payload, "model", attemptModel)
 			}
-			continue
-		}
 
-		err = statusErr{code: httpResp.StatusCode, msg: string(data)}
-		return resp, err
+			tok, errTok := tokenSource.Token()
+			if errTok != nil {
+				err = errTok
+				return resp, err
+			}
+			updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
+
+			url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, action)
+			if opts.Alt != "" && action != "countTokens" {
+				url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
+			}
+
+			reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
+			if errReq != nil {
+				err = errReq
+				return resp, err
+			}
+			reqHTTP.Header.Set("Content-Type", "application/json")
+			reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
+			applyGeminiCLIHeaders(reqHTTP)
+			reqHTTP.Header.Set("Accept", "application/json")
+			recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+				URL:       url,
+				Method:    http.MethodPost,
+				Headers:   reqHTTP.Header.Clone(),
+				Body:      payload,
+				Provider:  e.Identifier(),
+				AuthID:    authID,
+				AuthLabel: authLabel,
+				AuthType:  authType,
+				AuthValue: authValue,
+			})
+
+			httpResp, errDo := httpClient.Do(reqHTTP)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				err = errDo
+				return resp, err
+			}
+
+			data, errRead := io.ReadAll(httpResp.Body)
+			if errClose := httpResp.Body.Close(); errClose != nil {
+				log.Errorf("gemini cli executor: close response body error: %v", errClose)
+			}
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			if errRead != nil {
+				recordAPIResponseError(ctx, e.cfg, errRead)
+				err = errRead
+				return resp, err
+			}
+			appendAPIResponseChunk(ctx, e.cfg, data)
+			if httpResp.StatusCode >= 200 && httpResp.StatusCode < 300 {
+				reporter.publish(ctx, parseGeminiCLIUsage(data))
+				var param any
+				out := sdktranslator.TranslateNonStream(respCtx, to, from, attemptModel, bytes.Clone(opts.OriginalRequest), payload, data, &param)
+				resp = cliproxyexecutor.Response{Payload: []byte(out)}
+				return resp, nil
+			}
+
+			lastStatus = httpResp.StatusCode
+			lastBody = append([]byte(nil), data...)
+			log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+
+			// Handle 429 rate limit errors with retry
+			if httpResp.StatusCode == 429 {
+				if retryCount < maxRetries {
+					// Parse retry delay from Google's response
+					retryDelay := parseRetryDelay(data)
+					log.Infof("gemini cli executor: rate limited (429), retrying model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
+
+					// Wait for the specified delay
+					select {
+					case <-time.After(retryDelay):
+						// Continue to next retry iteration
+						continue
+					case <-ctx.Done():
+						// Context cancelled, return immediately
+						err = ctx.Err()
+						return resp, err
+					}
+				} else {
+					// Exhausted retries for this model, try next model if available
+					if idx+1 < len(models) {
+						log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
+						break // Break inner loop to try next model
+					} else {
+						log.Infof("gemini cli executor: rate limited, exhausted %d retries for model %s, no additional fallback model", maxRetries, attemptModel)
+						// No more models to try, will return error below
+					}
+				}
+			} else {
+				// Non-429 error, don't retry this model
+				err = statusErr{code: httpResp.StatusCode, msg: string(data)}
+				return resp, err
+			}
+
+			// Break inner loop if we hit this point (no retry needed or exhausted retries)
+			break
+		}
 	}
 
 	if len(lastBody) > 0 {
@@ -235,77 +269,133 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	var lastStatus int
 	var lastBody []byte
 
+	// Get max retry count from config, default to 3 if not set
+	maxRetries := e.cfg.RequestRetry
+	if maxRetries <= 0 {
+		maxRetries = 3
+	}
+
 	for idx, attemptModel := range models {
-		payload := append([]byte(nil), basePayload...)
-		payload = setJSONField(payload, "project", projectID)
-		payload = setJSONField(payload, "model", attemptModel)
+		var httpResp *http.Response
+		var payload []byte
+		var errDo error
+		shouldContinueToNextModel := false
 
-		tok, errTok := tokenSource.Token()
-		if errTok != nil {
-			err = errTok
-			return nil, err
-		}
-		updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
+		// Inner retry loop for 429 errors on the same model
+		for retryCount := 0; retryCount <= maxRetries; retryCount++ {
+			payload = append([]byte(nil), basePayload...)
+			payload = setJSONField(payload, "project", projectID)
+			payload = setJSONField(payload, "model", attemptModel)
 
-		url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, "streamGenerateContent")
-		if opts.Alt == "" {
-			url = url + "?alt=sse"
-		} else {
-			url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
-		}
-
-		reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
-		if errReq != nil {
-			err = errReq
-			return nil, err
-		}
-		reqHTTP.Header.Set("Content-Type", "application/json")
-		reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
-		applyGeminiCLIHeaders(reqHTTP)
-		reqHTTP.Header.Set("Accept", "text/event-stream")
-		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
-			URL:       url,
-			Method:    http.MethodPost,
-			Headers:   reqHTTP.Header.Clone(),
-			Body:      payload,
-			Provider:  e.Identifier(),
-			AuthID:    authID,
-			AuthLabel: authLabel,
-			AuthType:  authType,
-			AuthValue: authValue,
-		})
-
-		httpResp, errDo := httpClient.Do(reqHTTP)
-		if errDo != nil {
-			recordAPIResponseError(ctx, e.cfg, errDo)
-			err = errDo
-			return nil, err
-		}
-		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
-		if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
-			data, errRead := io.ReadAll(httpResp.Body)
-			if errClose := httpResp.Body.Close(); errClose != nil {
-				log.Errorf("gemini cli executor: close response body error: %v", errClose)
-			}
-			if errRead != nil {
-				recordAPIResponseError(ctx, e.cfg, errRead)
-				err = errRead
+			tok, errTok := tokenSource.Token()
+			if errTok != nil {
+				err = errTok
 				return nil, err
 			}
-			appendAPIResponseChunk(ctx, e.cfg, data)
-			lastStatus = httpResp.StatusCode
-			lastBody = append([]byte(nil), data...)
-			log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
-			if httpResp.StatusCode == 429 {
-				if idx+1 < len(models) {
-					log.Debugf("gemini cli executor: rate limited, retrying with next model: %s", models[idx+1])
-				} else {
-					log.Debug("gemini cli executor: rate limited, no additional fallback model")
-				}
-				continue
+			updateGeminiCLITokenMetadata(auth, baseTokenData, tok)
+
+			url := fmt.Sprintf("%s/%s:%s", codeAssistEndpoint, codeAssistVersion, "streamGenerateContent")
+			if opts.Alt == "" {
+				url = url + "?alt=sse"
+			} else {
+				url = url + fmt.Sprintf("?$alt=%s", opts.Alt)
 			}
-			err = statusErr{code: httpResp.StatusCode, msg: string(data)}
-			return nil, err
+
+			reqHTTP, errReq := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(payload))
+			if errReq != nil {
+				err = errReq
+				return nil, err
+			}
+			reqHTTP.Header.Set("Content-Type", "application/json")
+			reqHTTP.Header.Set("Authorization", "Bearer "+tok.AccessToken)
+			applyGeminiCLIHeaders(reqHTTP)
+			reqHTTP.Header.Set("Accept", "text/event-stream")
+			recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+				URL:       url,
+				Method:    http.MethodPost,
+				Headers:   reqHTTP.Header.Clone(),
+				Body:      payload,
+				Provider:  e.Identifier(),
+				AuthID:    authID,
+				AuthLabel: authLabel,
+				AuthType:  authType,
+				AuthValue: authValue,
+			})
+
+			httpResp, errDo = httpClient.Do(reqHTTP)
+			if errDo != nil {
+				recordAPIResponseError(ctx, e.cfg, errDo)
+				err = errDo
+				return nil, err
+			}
+			recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+			if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+				data, errRead := io.ReadAll(httpResp.Body)
+				if errClose := httpResp.Body.Close(); errClose != nil {
+					log.Errorf("gemini cli executor: close response body error: %v", errClose)
+				}
+				if errRead != nil {
+					recordAPIResponseError(ctx, e.cfg, errRead)
+					err = errRead
+					return nil, err
+				}
+				appendAPIResponseChunk(ctx, e.cfg, data)
+				lastStatus = httpResp.StatusCode
+				lastBody = append([]byte(nil), data...)
+				log.Debugf("request error, error status: %d, error body: %s", httpResp.StatusCode, summarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
+
+				// Handle 429 rate limit errors with retry
+				if httpResp.StatusCode == 429 {
+					if retryCount < maxRetries {
+						// Parse retry delay from Google's response
+						retryDelay := parseRetryDelay(data)
+						log.Infof("gemini cli executor: rate limited (429), retrying stream model %s in %v (attempt %d/%d)", attemptModel, retryDelay, retryCount+1, maxRetries)
+
+						// Wait for the specified delay
+						select {
+						case <-time.After(retryDelay):
+							// Continue to next retry iteration
+							continue
+						case <-ctx.Done():
+							// Context cancelled, return immediately
+							err = ctx.Err()
+							return nil, err
+						}
+					} else {
+						// Exhausted retries for this model, try next model if available
+						if idx+1 < len(models) {
+							log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, trying fallback model: %s", maxRetries, attemptModel, models[idx+1])
+							shouldContinueToNextModel = true
+							break // Break inner loop to try next model
+						} else {
+							log.Infof("gemini cli executor: rate limited, exhausted %d retries for stream model %s, no additional fallback model", maxRetries, attemptModel)
+							// No more models to try, will return error below
+						}
+					}
+				} else {
+					// Non-429 error, don't retry this model
+					err = statusErr{code: httpResp.StatusCode, msg: string(data)}
+					return nil, err
+				}
+
+				// Break inner loop if we hit this point (no retry needed or exhausted retries)
+				break
+			}
+
+			// Success - httpResp.StatusCode is 2xx, break out of retry loop
+			// and proceed to streaming logic below
+			break
+		}
+
+		// If we need to try the next fallback model, skip streaming logic
+		if shouldContinueToNextModel {
+			continue
+		}
+
+		// If we have a failed response (non-2xx), don't attempt streaming
+		// Continue outer loop to try next model or return error
+		if httpResp == nil || httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
+			continue
 		}
 
 		out := make(chan cliproxyexecutor.StreamChunk)
@@ -769,3 +859,48 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte {
 	}
 	return rawJSON
 }
+
+// parseRetryDelay extracts the retry delay from a Google API 429 error response.
+// The error response contains a RetryInfo.retryDelay field in the format "0.847655010s".
+// Returns the duration to wait, or a default duration if parsing fails.
+func parseRetryDelay(errorBody []byte) time.Duration {
+	const defaultDelay = 1 * time.Second
+	const maxDelay = 60 * time.Second
+
+	// Try to parse the retryDelay from the error response
+	// Format: error.details[].retryDelay where @type == "type.googleapis.com/google.rpc.RetryInfo"
+	details := gjson.GetBytes(errorBody, "error.details")
+	if !details.Exists() || !details.IsArray() {
+		log.Debugf("parseRetryDelay: no error.details found, using default delay %v", defaultDelay)
+		return defaultDelay
+	}
+
+	for _, detail := range details.Array() {
+		typeVal := detail.Get("@type").String()
+		if typeVal == "type.googleapis.com/google.rpc.RetryInfo" {
+			retryDelay := detail.Get("retryDelay").String()
+			if retryDelay != "" {
+				// Parse duration string like "0.847655010s"
+				duration, err := time.ParseDuration(retryDelay)
+				if err != nil {
+					log.Debugf("parseRetryDelay: failed to parse duration %q: %v, using default", retryDelay, err)
+					return defaultDelay
+				}
+				// Cap at maxDelay to prevent excessive waits
+				if duration > maxDelay {
+					log.Debugf("parseRetryDelay: capping delay from %v to %v", duration, maxDelay)
+					return maxDelay
+				}
+				if duration < 0 {
+					log.Debugf("parseRetryDelay: negative delay %v, using default", duration)
+					return defaultDelay
+				}
+				log.Debugf("parseRetryDelay: using delay %v from API response", duration)
+				return duration
+			}
+		}
+	}
+
+	log.Debugf("parseRetryDelay: no RetryInfo found, using default delay %v", defaultDelay)
+	return defaultDelay
+}
diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
index 6f7ac724..99b50366 100644
--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go
@@ -88,6 +88,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo
 		}
 	}
 
+	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
+	// This matches the official Gemini CLI behavior which always sends:
+	// { thinkingBudget: -1, includeThoughts: true }
+	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
+	if !gjson.GetBytes(out, "request.generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
+		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
+		out, _ = sjson.SetBytes(out, "request.generationConfig.thinkingConfig.include_thoughts", true)
+	}
+
 	// Temperature/top_p/top_k
 	if tr := gjson.GetBytes(rawJSON, "temperature"); tr.Exists() && tr.Type == gjson.Number {
 		out, _ = sjson.SetBytes(out, "request.generationConfig.temperature", tr.Num)
diff --git a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
index 4eeebf3c..981fafc1 100644
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/translator/gemini/common"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	log "github.com/sirupsen/logrus"
 	"github.com/tidwall/gjson"
 	"github.com/tidwall/sjson"
 )
@@ -294,6 +295,17 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			}
 		}
 	}
+
+	// For gemini-3-pro-preview, always send default thinkingConfig when none specified.
+	// This matches the official Gemini CLI behavior which always sends:
+	// { thinkingBudget: -1, includeThoughts: true }
+	// See: ai-gemini-cli/packages/core/src/config/defaultModelConfigs.ts
+	if !gjson.Get(out, "generationConfig.thinkingConfig").Exists() && modelName == "gemini-3-pro-preview" {
+		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.thinkingBudget", -1)
+		out, _ = sjson.Set(out, "generationConfig.thinkingConfig.include_thoughts", true)
+		log.Debugf("Applied default thinkingConfig for gemini-3-pro-preview (matches Gemini CLI): thinkingBudget=-1, include_thoughts=true")
+	}
+
 	result := []byte(out)
 	result = common.AttachDefaultSafetySettings(result, "safetySettings")
 	return result