**feat(registry): add Gemini 3 Pro Preview model definition**

**fix(registry): update Thinking parameters and replace Gemini-3 Preview with Gemini-2.5 Flash Lite**
2025-11-18 23:48:21 +08:00 · 2025-11-18 23:46:58 +08:00 · 2025-11-18 11:51:52 +08:00 · 2025-11-18 11:27:48 +08:00 · 2025-11-18 10:59:49 +08:00 · 2025-11-16 19:02:27 +08:00
18 changed files with 545 additions and 218 deletions
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -346,6 +346,10 @@ func (h *Handler) buildAuthFileEntry(auth *coreauth.Auth) gin.H {
 			entry["size"] = info.Size()
 			entry["modtime"] = info.ModTime()
 		} else if os.IsNotExist(err) {
+			// Hide credentials removed from disk but still lingering in memory.
+			if !runtimeOnly && (auth.Disabled || auth.Status == coreauth.StatusDisabled || strings.EqualFold(strings.TrimSpace(auth.StatusMessage), "removed via management api")) {
+				return nil
+			}
 			entry["source"] = "memory"
 		} else {
 			log.WithError(err).Warnf("failed to stat auth file %s", path)
--- a/internal/api/handlers/management/config_lists.go
+++ b/internal/api/handlers/management/config_lists.go
@@ -408,9 +408,7 @@ func (h *Handler) PutOpenAICompat(c *gin.Context) {
 		}
 		arr = obj.Items
 	}
-	for i := range arr {
-		normalizeOpenAICompatibilityEntry(&arr[i])
-	}
+	arr = migrateLegacyOpenAICompatibilityKeys(arr)
 	// Filter out providers with empty base-url -> remove provider entirely
 	filtered := make([]config.OpenAICompatibility, 0, len(arr))
 	for i := range arr {
@@ -418,7 +416,7 @@ func (h *Handler) PutOpenAICompat(c *gin.Context) {
 			filtered = append(filtered, arr[i])
 		}
 	}
-	h.cfg.OpenAICompatibility = filtered
+	h.cfg.OpenAICompatibility = migrateLegacyOpenAICompatibilityKeys(filtered)
 	h.cfg.SanitizeOpenAICompatibility()
 	h.persist(c)
 }
@@ -432,6 +430,7 @@ func (h *Handler) PatchOpenAICompat(c *gin.Context) {
 		c.JSON(400, gin.H{"error": "invalid body"})
 		return
 	}
+	h.cfg.OpenAICompatibility = migrateLegacyOpenAICompatibilityKeys(h.cfg.OpenAICompatibility)
 	normalizeOpenAICompatibilityEntry(body.Value)
 	// If base-url becomes empty, delete the provider instead of updating
 	if strings.TrimSpace(body.Value.BaseURL) == "" {
@@ -661,6 +660,13 @@ func normalizeOpenAICompatibilityEntry(entry *config.OpenAICompatibility) {
 	entry.APIKeys = nil
 }

+func migrateLegacyOpenAICompatibilityKeys(entries []config.OpenAICompatibility) []config.OpenAICompatibility {
+	for i := range entries {
+		normalizeOpenAICompatibilityEntry(&entries[i])
+	}
+	return entries
+}
+
 func normalizedOpenAICompatibilityEntries(entries []config.OpenAICompatibility) []config.OpenAICompatibility {
 	if len(entries) == 0 {
 		return nil
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -479,6 +479,7 @@ func SaveConfigPreserveComments(configFile string, cfg *Config) error {

 	// Remove deprecated auth block before merging to avoid persisting it again.
 	removeMapKey(original.Content[0], "auth")
+	removeLegacyOpenAICompatAPIKeys(original.Content[0])

 	// Merge generated into original in-place, preserving comments/order of existing nodes.
 	mergeMappingPreserve(original.Content[0], generated.Content[0])
@@ -935,6 +936,25 @@ func removeMapKey(mapNode *yaml.Node, key string) {
 	}
 }

+func removeLegacyOpenAICompatAPIKeys(root *yaml.Node) {
+	if root == nil || root.Kind != yaml.MappingNode {
+		return
+	}
+	idx := findMapKeyIndex(root, "openai-compatibility")
+	if idx < 0 || idx+1 >= len(root.Content) {
+		return
+	}
+	seq := root.Content[idx+1]
+	if seq == nil || seq.Kind != yaml.SequenceNode {
+		return
+	}
+	for i := range seq.Content {
+		if seq.Content[i] != nil && seq.Content[i].Kind == yaml.MappingNode {
+			removeMapKey(seq.Content[i], "api-keys")
+		}
+	}
+}
+
 // normalizeCollectionNodeStyles forces YAML collections to use block notation, keeping
 // lists and maps readable. Empty sequences retain flow style ([]) so empty list markers
 // remain compact.
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -114,7 +114,7 @@ func GeminiModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 512, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
 	}
 }
@@ -156,20 +156,35 @@ func GetGeminiCLIModels() []*ModelInfo {
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:                         "gemini-3-pro-preview-11-2025",
+			ID:                         "gemini-2.5-flash-lite",
 			Object:                     "model",
 			Created:                    time.Now().Unix(),
 			OwnedBy:                    "google",
 			Type:                       "gemini",
-			Name:                       "models/gemini-3-pro-preview-11-2025",
-			Version:                    "3",
-			DisplayName:                "Gemini 3 Pro Preview 11-2025",
-			Description:                "Latest preview of Gemini Pro",
+			Name:                       "models/gemini-2.5-flash-lite",
+			Version:                    "2.5",
+			DisplayName:                "Gemini 2.5 Flash Lite",
+			Description:                "Our smallest and most cost effective model, built for at scale usage.",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true},
 		},
+		// {
+		// 	ID:                         "gemini-3-pro-preview-11-2025",
+		// 	Object:                     "model",
+		// 	Created:                    time.Now().Unix(),
+		// 	OwnedBy:                    "google",
+		// 	Type:                       "gemini",
+		// 	Name:                       "models/gemini-3-pro-preview-11-2025",
+		// 	Version:                    "3",
+		// 	DisplayName:                "Gemini 3 Pro Preview 11-2025",
+		// 	Description:                "Latest preview of Gemini Pro",
+		// 	InputTokenLimit:            1048576,
+		// 	OutputTokenLimit:           65536,
+		// 	SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		// 	Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+		// },
 	}
 }

@@ -179,6 +194,21 @@ func GetAIStudioModels() []*ModelInfo {

 	return append(base,
 		[]*ModelInfo{
+			{
+				ID:                         "gemini-3-pro-preview",
+				Object:                     "model",
+				Created:                    time.Now().Unix(),
+				OwnedBy:                    "google",
+				Type:                       "gemini",
+				Name:                       "models/gemini-3-pro-preview",
+				Version:                    "3.0",
+				DisplayName:                "Gemini 3 Pro Preview",
+				Description:                "Gemini 3 Pro Preview",
+				InputTokenLimit:            1048576,
+				OutputTokenLimit:           65536,
+				SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+				Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			},
 			{
 				ID:                         "gemini-pro-latest",
 				Object:                     "model",
@@ -431,13 +461,13 @@ func GetOpenAIModels() []*ModelInfo {
 			SupportedParameters: []string{"tools"},
 		},
 		{
-			ID:                  "gpt-5.1-minimal",
+			ID:                  "gpt-5.1-none",
 			Object:              "model",
 			Created:             time.Now().Unix(),
 			OwnedBy:             "openai",
 			Type:                "openai",
 			Version:             "gpt-5.1-2025-11-12",
-			DisplayName:         "GPT 5 Minimal",
+			DisplayName:         "GPT 5 Low",
 			Description:         "Stable version of GPT 5, The best model for coding and agentic tasks across domains.",
 			ContextLength:       400000,
 			MaxCompletionTokens: 128000,
@@ -639,6 +669,7 @@ func GetIFlowModels() []*ModelInfo {
 		{ID: "kimi-k2-0905", DisplayName: "Kimi-K2-Instruct-0905", Description: "Moonshot Kimi K2 instruct 0905"},
 		{ID: "glm-4.6", DisplayName: "GLM-4.6", Description: "Zhipu GLM 4.6 general model"},
 		{ID: "kimi-k2", DisplayName: "Kimi-K2", Description: "Moonshot Kimi K2 general model"},
+		{ID: "kimi-k2-thinking", DisplayName: "Kimi-K2-Thinking", Description: "Moonshot Kimi K2 general model"},
 		{ID: "deepseek-v3.2", DisplayName: "DeepSeek-V3.2-Exp", Description: "DeepSeek V3.2 experimental"},
 		{ID: "deepseek-v3.1", DisplayName: "DeepSeek-V3.1-Terminus", Description: "DeepSeek V3.1 Terminus"},
 		{ID: "deepseek-r1", DisplayName: "DeepSeek-R1", Description: "DeepSeek reasoning model R1"},
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -219,8 +219,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 		// If from == to (Claude → Claude), directly forward the SSE stream without translation
 		if from == to {
 			scanner := bufio.NewScanner(decodedBody)
-			buf := make([]byte, 20_971_520)
-			scanner.Buffer(buf, 20_971_520)
+			scanner.Buffer(nil, 20_971_520)
 			for scanner.Scan() {
 				line := scanner.Bytes()
 				appendAPIResponseChunk(ctx, e.cfg, line)
@@ -243,8 +242,7 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A

 		// For other formats, use translation
 		scanner := bufio.NewScanner(decodedBody)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/codex_executor.go
+++ b/internal/runtime/executor/codex_executor.go
@@ -53,71 +53,8 @@ func (e *CodexExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, re
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

-	if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5")
-		switch req.Model {
-		case "gpt-5-minimal":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "minimal")
-		case "gpt-5-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex")
-		switch req.Model {
-		case "gpt-5-codex-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5-codex-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-codex-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex-mini")
-		switch req.Model {
-		case "gpt-5-codex-mini-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-codex-mini-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		default:
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		}
-	} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-minimal", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5.1")
-		switch req.Model {
-		case "gpt-5.1-minimal":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "minimal")
-		case "gpt-5.1-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5.1-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5.1-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5.1-codex")
-		switch req.Model {
-		case "gpt-5.1-codex-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5.1-codex-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5.1-codex-mini")
-		switch req.Model {
-		case "gpt-5.1-codex-mini-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5.1-codex-mini-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		default:
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		}
-	}
+	body = e.setReasoningEffortByAlias(req.Model, body)
+
 	body = applyPayloadConfig(e.cfg, req.Model, body)

 	body, _ = sjson.SetBytes(body, "stream", true)
@@ -209,37 +146,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("codex")
 	body := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

-	if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5")
-		switch req.Model {
-		case "gpt-5-minimal":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "minimal")
-		case "gpt-5-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex")
-		switch req.Model {
-		case "gpt-5-codex-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5-codex-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-codex-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, req.Model) {
-		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex-mini")
-		switch req.Model {
-		case "gpt-5-codex-mini-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-codex-mini-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		}
-	}
+	body = e.setReasoningEffortByAlias(req.Model, body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)
 	body, _ = sjson.DeleteBytes(body, "previous_response_id")

@@ -298,8 +205,7 @@ func (e *CodexExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
@@ -335,46 +241,7 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth

 	modelForCounting := req.Model

-	if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, req.Model) {
-		modelForCounting = "gpt-5"
-		body, _ = sjson.SetBytes(body, "model", "gpt-5")
-		switch req.Model {
-		case "gpt-5-minimal":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "minimal")
-		case "gpt-5-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		default:
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		}
-	} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, req.Model) {
-		modelForCounting = "gpt-5"
-		body, _ = sjson.SetBytes(body, "model", "gpt-5-codex")
-		switch req.Model {
-		case "gpt-5-codex-low":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		case "gpt-5-codex-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-codex-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		default:
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "low")
-		}
-	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, req.Model) {
-		modelForCounting = "gpt-5"
-		body, _ = sjson.SetBytes(body, "model", "codex-mini-latest")
-		switch req.Model {
-		case "gpt-5-codex-mini-medium":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		case "gpt-5-codex-mini-high":
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "high")
-		default:
-			body, _ = sjson.SetBytes(body, "reasoning.effort", "medium")
-		}
-	}
+	body = e.setReasoningEffortByAlias(req.Model, body)

 	body, _ = sjson.DeleteBytes(body, "previous_response_id")
 	body, _ = sjson.SetBytes(body, "stream", false)
@@ -394,6 +261,71 @@ func (e *CodexExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth
 	return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
 }

+func (e *CodexExecutor) setReasoningEffortByAlias(modelName string, payload []byte) []byte {
+	if util.InArray([]string{"gpt-5", "gpt-5-minimal", "gpt-5-low", "gpt-5-medium", "gpt-5-high"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5")
+		switch modelName {
+		case "gpt-5-minimal":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "minimal")
+		case "gpt-5-low":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
+		case "gpt-5-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		}
+	} else if util.InArray([]string{"gpt-5-codex", "gpt-5-codex-low", "gpt-5-codex-medium", "gpt-5-codex-high"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex")
+		switch modelName {
+		case "gpt-5-codex-low":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
+		case "gpt-5-codex-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5-codex-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		}
+	} else if util.InArray([]string{"gpt-5-codex-mini", "gpt-5-codex-mini-medium", "gpt-5-codex-mini-high"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5-codex-mini")
+		switch modelName {
+		case "gpt-5-codex-mini-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5-codex-mini-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		}
+	} else if util.InArray([]string{"gpt-5.1", "gpt-5.1-none", "gpt-5.1-low", "gpt-5.1-medium", "gpt-5.1-high"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1")
+		switch modelName {
+		case "gpt-5.1-none":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "none")
+		case "gpt-5.1-low":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
+		case "gpt-5.1-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5.1-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		}
+	} else if util.InArray([]string{"gpt-5.1-codex", "gpt-5.1-codex-low", "gpt-5.1-codex-medium", "gpt-5.1-codex-high"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex")
+		switch modelName {
+		case "gpt-5.1-codex-low":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "low")
+		case "gpt-5.1-codex-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5.1-codex-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		}
+	} else if util.InArray([]string{"gpt-5.1-codex-mini", "gpt-5.1-codex-mini-medium", "gpt-5.1-codex-mini-high"}, modelName) {
+		payload, _ = sjson.SetBytes(payload, "model", "gpt-5.1-codex-mini")
+		switch modelName {
+		case "gpt-5.1-codex-mini-medium":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "medium")
+		case "gpt-5.1-codex-mini-high":
+			payload, _ = sjson.SetBytes(payload, "reasoning.effort", "high")
+		}
+	}
+	return payload
+}
+
 func tokenizerForCodexModel(model string) (tokenizer.Codec, error) {
 	sanitized := strings.ToLower(strings.TrimSpace(model))
 	switch {
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -319,8 +319,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 			}()
 			if opts.Alt == "" {
 				scanner := bufio.NewScanner(resp.Body)
-				buf := make([]byte, 20_971_520)
-				scanner.Buffer(buf, 20_971_520)
+				scanner.Buffer(nil, 20_971_520)
 				var param any
 				for scanner.Scan() {
 					line := scanner.Bytes()
--- a/internal/runtime/executor/gemini_executor.go
+++ b/internal/runtime/executor/gemini_executor.go
@@ -251,8 +251,7 @@ func (e *GeminiExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/gemini_vertex_executor.go
+++ b/internal/runtime/executor/gemini_vertex_executor.go
@@ -240,8 +240,7 @@ func (e *GeminiVertexExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/iflow_executor.go
+++ b/internal/runtime/executor/iflow_executor.go
@@ -199,8 +199,7 @@ func (e *IFlowExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Au
 		}()

 		scanner := bufio.NewScanner(httpResp.Body)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/openai_compat_executor.go
+++ b/internal/runtime/executor/openai_compat_executor.go
@@ -205,8 +205,7 @@ func (e *OpenAICompatExecutor) ExecuteStream(ctx context.Context, auth *cliproxy
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/runtime/executor/qwen_executor.go
+++ b/internal/runtime/executor/qwen_executor.go
@@ -181,8 +181,7 @@ func (e *QwenExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Aut
 			}
 		}()
 		scanner := bufio.NewScanner(httpResp.Body)
-		buf := make([]byte, 20_971_520)
-		scanner.Buffer(buf, 20_971_520)
+		scanner.Buffer(nil, 20_971_520)
 		var param any
 		for scanner.Scan() {
 			line := scanner.Bytes()
--- a/internal/translator/codex/claude/codex_claude_request.go
+++ b/internal/translator/codex/claude/codex_claude_request.go
@@ -204,7 +204,7 @@ func ConvertClaudeRequestToCodex(modelName string, inputRawJSON []byte, _ bool)
 				}
 				tool, _ = sjson.Set(tool, "name", name)
 			}
-			tool, _ = sjson.SetRaw(tool, "parameters", toolResult.Get("input_schema").Raw)
+			tool, _ = sjson.SetRaw(tool, "parameters", normalizeToolParameters(toolResult.Get("input_schema").Raw))
 			tool, _ = sjson.Delete(tool, "input_schema")
 			tool, _ = sjson.Delete(tool, "parameters.$schema")
 			tool, _ = sjson.Set(tool, "strict", false)
@@ -334,3 +334,22 @@ func buildReverseMapFromClaudeOriginalToShort(original []byte) map[string]string
 	}
 	return m
 }
+
+// normalizeToolParameters ensures object schemas contain at least an empty properties map.
+func normalizeToolParameters(raw string) string {
+	raw = strings.TrimSpace(raw)
+	if raw == "" || raw == "null" || !gjson.Valid(raw) {
+		return `{"type":"object","properties":{}}`
+	}
+	schema := raw
+	result := gjson.Parse(raw)
+	schemaType := result.Get("type").String()
+	if schemaType == "" {
+		schema, _ = sjson.Set(schema, "type", "object")
+		schemaType = "object"
+	}
+	if schemaType == "object" && !result.Get("properties").Exists() {
+		schema, _ = sjson.SetRaw(schema, "properties", `{}`)
+	}
+	return schema
+}
--- a/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
+++ b/internal/translator/gemini-cli/claude/gemini-cli_claude_request.go
@@ -128,6 +128,7 @@ func ConvertClaudeRequestToCLI(modelName string, inputRawJSON []byte, _ bool) []
 				tool, _ := sjson.Delete(toolResult.Raw, "input_schema")
 				tool, _ = sjson.SetRaw(tool, "parametersJsonSchema", inputSchema)
 				tool, _ = sjson.Delete(tool, "strict")
+				tool, _ = sjson.Delete(tool, "input_examples")
 				var toolDeclaration any
 				if err := json.Unmarshal([]byte(tool), &toolDeclaration); err == nil {
 					tools[0].FunctionDeclarations = append(tools[0].FunctionDeclarations, toolDeclaration)
--- a/internal/translator/gemini/claude/gemini_claude_request.go
+++ b/internal/translator/gemini/claude/gemini_claude_request.go
@@ -121,6 +121,7 @@ func ConvertClaudeRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
 				tool, _ := sjson.Delete(toolResult.Raw, "input_schema")
 				tool, _ = sjson.SetRaw(tool, "parametersJsonSchema", inputSchema)
 				tool, _ = sjson.Delete(tool, "strict")
+				tool, _ = sjson.Delete(tool, "input_examples")
 				var toolDeclaration any
 				if err := json.Unmarshal([]byte(tool), &toolDeclaration); err == nil {
 					tools[0].FunctionDeclarations = append(tools[0].FunctionDeclarations, toolDeclaration)
--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -156,6 +156,11 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte

 			return true
 		})
+	} else if input.Exists() && input.Type == gjson.String {
+		// Simple string input conversion to user message
+		userContent := `{"role":"user","parts":[{"text":""}]}`
+		userContent, _ = sjson.Set(userContent, "parts.0.text", input.String())
+		out, _ = sjson.SetRaw(out, "contents.-1", userContent)
 	}

 	// Convert tools to Gemini functionDeclarations format
--- a/internal/translator/openai/claude/openai_claude_response.go
+++ b/internal/translator/openai/claude/openai_claude_response.go
@@ -32,6 +32,8 @@ type ConvertOpenAIResponseToAnthropicParams struct {
 	ToolCallsAccumulator map[int]*ToolCallAccumulator
 	// Track if text content block has been started
 	TextContentBlockStarted bool
+	// Track if thinking content block has been started
+	ThinkingContentBlockStarted bool
 	// Track finish reason for later use
 	FinishReason string
 	// Track if content blocks have been stopped
@@ -40,6 +42,16 @@ type ConvertOpenAIResponseToAnthropicParams struct {
 	MessageDeltaSent bool
 	// Track if message_start has been sent
 	MessageStarted bool
+	// Track if message_stop has been sent
+	MessageStopSent bool
+	// Tool call content block index mapping
+	ToolCallBlockIndexes map[int]int
+	// Index assigned to text content block
+	TextContentBlockIndex int
+	// Index assigned to thinking content block
+	ThinkingContentBlockIndex int
+	// Next available content block index
+	NextContentBlockIndex int
 }

 // ToolCallAccumulator holds the state for accumulating tool call data
@@ -64,15 +76,20 @@ type ToolCallAccumulator struct {
 func ConvertOpenAIResponseToClaude(_ context.Context, _ string, originalRequestRawJSON, requestRawJSON, rawJSON []byte, param *any) []string {
 	if *param == nil {
 		*param = &ConvertOpenAIResponseToAnthropicParams{
-			MessageID:               "",
-			Model:                   "",
-			CreatedAt:               0,
-			ContentAccumulator:      strings.Builder{},
-			ToolCallsAccumulator:    nil,
-			TextContentBlockStarted: false,
-			FinishReason:            "",
-			ContentBlocksStopped:    false,
-			MessageDeltaSent:        false,
+			MessageID:                   "",
+			Model:                       "",
+			CreatedAt:                   0,
+			ContentAccumulator:          strings.Builder{},
+			ToolCallsAccumulator:        nil,
+			TextContentBlockStarted:     false,
+			ThinkingContentBlockStarted: false,
+			FinishReason:                "",
+			ContentBlocksStopped:        false,
+			MessageDeltaSent:            false,
+			ToolCallBlockIndexes:        make(map[int]int),
+			TextContentBlockIndex:       -1,
+			ThinkingContentBlockIndex:   -1,
+			NextContentBlockIndex:       0,
 		}
 	}

@@ -138,13 +155,56 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 			// Don't send content_block_start for text here - wait for actual content
 		}

+		// Handle reasoning content delta
+		if reasoning := delta.Get("reasoning_content"); reasoning.Exists() {
+			for _, reasoningText := range collectOpenAIReasoningTexts(reasoning) {
+				if reasoningText == "" {
+					continue
+				}
+				stopTextContentBlock(param, &results)
+				if !param.ThinkingContentBlockStarted {
+					if param.ThinkingContentBlockIndex == -1 {
+						param.ThinkingContentBlockIndex = param.NextContentBlockIndex
+						param.NextContentBlockIndex++
+					}
+					contentBlockStart := map[string]interface{}{
+						"type":  "content_block_start",
+						"index": param.ThinkingContentBlockIndex,
+						"content_block": map[string]interface{}{
+							"type":     "thinking",
+							"thinking": "",
+						},
+					}
+					contentBlockStartJSON, _ := json.Marshal(contentBlockStart)
+					results = append(results, "event: content_block_start\ndata: "+string(contentBlockStartJSON)+"\n\n")
+					param.ThinkingContentBlockStarted = true
+				}
+
+				thinkingDelta := map[string]interface{}{
+					"type":  "content_block_delta",
+					"index": param.ThinkingContentBlockIndex,
+					"delta": map[string]interface{}{
+						"type":     "thinking_delta",
+						"thinking": reasoningText,
+					},
+				}
+				thinkingDeltaJSON, _ := json.Marshal(thinkingDelta)
+				results = append(results, "event: content_block_delta\ndata: "+string(thinkingDeltaJSON)+"\n\n")
+			}
+		}
+
 		// Handle content delta
 		if content := delta.Get("content"); content.Exists() && content.String() != "" {
 			// Send content_block_start for text if not already sent
 			if !param.TextContentBlockStarted {
+				stopThinkingContentBlock(param, &results)
+				if param.TextContentBlockIndex == -1 {
+					param.TextContentBlockIndex = param.NextContentBlockIndex
+					param.NextContentBlockIndex++
+				}
 				contentBlockStart := map[string]interface{}{
 					"type":  "content_block_start",
-					"index": 0,
+					"index": param.TextContentBlockIndex,
 					"content_block": map[string]interface{}{
 						"type": "text",
 						"text": "",
@@ -157,7 +217,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI

 			contentDelta := map[string]interface{}{
 				"type":  "content_block_delta",
-				"index": 0,
+				"index": param.TextContentBlockIndex,
 				"delta": map[string]interface{}{
 					"type": "text_delta",
 					"text": content.String(),
@@ -178,6 +238,7 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI

 			toolCalls.ForEach(func(_, toolCall gjson.Result) bool {
 				index := int(toolCall.Get("index").Int())
+				blockIndex := param.toolContentBlockIndex(index)

 				// Initialize accumulator if needed
 				if _, exists := param.ToolCallsAccumulator[index]; !exists {
@@ -196,20 +257,14 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 					if name := function.Get("name"); name.Exists() {
 						accumulator.Name = name.String()

-						if param.TextContentBlockStarted {
-							param.TextContentBlockStarted = false
-							contentBlockStop := map[string]interface{}{
-								"type":  "content_block_stop",
-								"index": index,
-							}
-							contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
-							results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
-						}
+						stopThinkingContentBlock(param, &results)
+
+						stopTextContentBlock(param, &results)

 						// Send content_block_start for tool_use
 						contentBlockStart := map[string]interface{}{
 							"type":  "content_block_start",
-							"index": index + 1, // Offset by 1 since text is at index 0
+							"index": blockIndex,
 							"content_block": map[string]interface{}{
 								"type":  "tool_use",
 								"id":    accumulator.ID,
@@ -240,26 +295,32 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 		reason := finishReason.String()
 		param.FinishReason = reason

-		// Send content_block_stop for text if text content block was started
-		if param.TextContentBlockStarted && !param.ContentBlocksStopped {
+		// Send content_block_stop for thinking content if needed
+		if param.ThinkingContentBlockStarted {
 			contentBlockStop := map[string]interface{}{
 				"type":  "content_block_stop",
-				"index": 0,
+				"index": param.ThinkingContentBlockIndex,
 			}
 			contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
 			results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+			param.ThinkingContentBlockStarted = false
+			param.ThinkingContentBlockIndex = -1
 		}

+		// Send content_block_stop for text if text content block was started
+		stopTextContentBlock(param, &results)
+
 		// Send content_block_stop for any tool calls
 		if !param.ContentBlocksStopped {
 			for index := range param.ToolCallsAccumulator {
 				accumulator := param.ToolCallsAccumulator[index]
+				blockIndex := param.toolContentBlockIndex(index)

 				// Send complete input_json_delta with all accumulated arguments
 				if accumulator.Arguments.Len() > 0 {
 					inputDelta := map[string]interface{}{
 						"type":  "content_block_delta",
-						"index": index + 1,
+						"index": blockIndex,
 						"delta": map[string]interface{}{
 							"type":         "input_json_delta",
 							"partial_json": util.FixJSON(accumulator.Arguments.String()),
@@ -271,10 +332,11 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI

 				contentBlockStop := map[string]interface{}{
 					"type":  "content_block_stop",
-					"index": index + 1,
+					"index": blockIndex,
 				}
 				contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
 				results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+				delete(param.ToolCallBlockIndexes, index)
 			}
 			param.ContentBlocksStopped = true
 		}
@@ -284,29 +346,38 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI

 	// Handle usage information separately (this comes in a later chunk)
 	// Only process if usage has actual values (not null)
-	if usage := root.Get("usage"); usage.Exists() && usage.Type != gjson.Null && param.FinishReason != "" {
-		// Check if usage has actual token counts
-		promptTokens := usage.Get("prompt_tokens")
-		completionTokens := usage.Get("completion_tokens")
+	if param.FinishReason != "" {
+		usage := root.Get("usage")
+		var inputTokens, outputTokens int64
+		if usage.Exists() && usage.Type != gjson.Null {
+			// Check if usage has actual token counts
+			promptTokens := usage.Get("prompt_tokens")
+			completionTokens := usage.Get("completion_tokens")

-		if promptTokens.Exists() && completionTokens.Exists() {
-			// Send message_delta with usage
-			messageDelta := map[string]interface{}{
-				"type": "message_delta",
-				"delta": map[string]interface{}{
-					"stop_reason":   mapOpenAIFinishReasonToAnthropic(param.FinishReason),
-					"stop_sequence": nil,
-				},
-				"usage": map[string]interface{}{
-					"input_tokens":  promptTokens.Int(),
-					"output_tokens": completionTokens.Int(),
-				},
+			if promptTokens.Exists() && completionTokens.Exists() {
+				inputTokens = promptTokens.Int()
+				outputTokens = completionTokens.Int()
 			}
-
-			messageDeltaJSON, _ := json.Marshal(messageDelta)
-			results = append(results, "event: message_delta\ndata: "+string(messageDeltaJSON)+"\n\n")
-			param.MessageDeltaSent = true
 		}
+		// Send message_delta with usage
+		messageDelta := map[string]interface{}{
+			"type": "message_delta",
+			"delta": map[string]interface{}{
+				"stop_reason":   mapOpenAIFinishReasonToAnthropic(param.FinishReason),
+				"stop_sequence": nil,
+			},
+			"usage": map[string]interface{}{
+				"input_tokens":  inputTokens,
+				"output_tokens": outputTokens,
+			},
+		}
+
+		messageDeltaJSON, _ := json.Marshal(messageDelta)
+		results = append(results, "event: message_delta\ndata: "+string(messageDeltaJSON)+"\n\n")
+		param.MessageDeltaSent = true
+
+		emitMessageStopIfNeeded(param, &results)
+
 	}

 	return results
@@ -316,6 +387,49 @@ func convertOpenAIStreamingChunkToAnthropic(rawJSON []byte, param *ConvertOpenAI
 func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams) []string {
 	var results []string

+	// Ensure all content blocks are stopped before final events
+	if param.ThinkingContentBlockStarted {
+		contentBlockStop := map[string]interface{}{
+			"type":  "content_block_stop",
+			"index": param.ThinkingContentBlockIndex,
+		}
+		contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
+		results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+		param.ThinkingContentBlockStarted = false
+		param.ThinkingContentBlockIndex = -1
+	}
+
+	stopTextContentBlock(param, &results)
+
+	if !param.ContentBlocksStopped {
+		for index := range param.ToolCallsAccumulator {
+			accumulator := param.ToolCallsAccumulator[index]
+			blockIndex := param.toolContentBlockIndex(index)
+
+			if accumulator.Arguments.Len() > 0 {
+				inputDelta := map[string]interface{}{
+					"type":  "content_block_delta",
+					"index": blockIndex,
+					"delta": map[string]interface{}{
+						"type":         "input_json_delta",
+						"partial_json": util.FixJSON(accumulator.Arguments.String()),
+					},
+				}
+				inputDeltaJSON, _ := json.Marshal(inputDelta)
+				results = append(results, "event: content_block_delta\ndata: "+string(inputDeltaJSON)+"\n\n")
+			}
+
+			contentBlockStop := map[string]interface{}{
+				"type":  "content_block_stop",
+				"index": blockIndex,
+			}
+			contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
+			results = append(results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+			delete(param.ToolCallBlockIndexes, index)
+		}
+		param.ContentBlocksStopped = true
+	}
+
 	// If we haven't sent message_delta yet (no usage info was received), send it now
 	if param.FinishReason != "" && !param.MessageDeltaSent {
 		messageDelta := map[string]interface{}{
@@ -331,8 +445,7 @@ func convertOpenAIDoneToAnthropic(param *ConvertOpenAIResponseToAnthropicParams)
 		param.MessageDeltaSent = true
 	}

-	// Send message_stop
-	results = append(results, "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n")
+	emitMessageStopIfNeeded(param, &results)

 	return results
 }
@@ -361,6 +474,18 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {

 	if choices := root.Get("choices"); choices.Exists() && choices.IsArray() {
 		choice := choices.Array()[0] // Take first choice
+		reasoningNode := choice.Get("message.reasoning_content")
+		allReasoning := collectOpenAIReasoningTexts(reasoningNode)
+
+		for _, reasoningText := range allReasoning {
+			if reasoningText == "" {
+				continue
+			}
+			contentBlocks = append(contentBlocks, map[string]interface{}{
+				"type":     "thinking",
+				"thinking": reasoningText,
+			})
+		}

 		// Handle text content
 		if content := choice.Get("message.content"); content.Exists() && content.String() != "" {
@@ -412,6 +537,17 @@ func convertOpenAINonStreamingToAnthropic(rawJSON []byte) []string {
 		response["usage"] = map[string]interface{}{
 			"input_tokens":  usage.Get("prompt_tokens").Int(),
 			"output_tokens": usage.Get("completion_tokens").Int(),
+			"reasoning_tokens": func() int64 {
+				if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
+					return v.Int()
+				}
+				return 0
+			}(),
+		}
+	} else {
+		response["usage"] = map[string]interface{}{
+			"input_tokens":  0,
+			"output_tokens": 0,
 		}
 	}

@@ -437,6 +573,84 @@ func mapOpenAIFinishReasonToAnthropic(openAIReason string) string {
 	}
 }

+func (p *ConvertOpenAIResponseToAnthropicParams) toolContentBlockIndex(openAIToolIndex int) int {
+	if idx, ok := p.ToolCallBlockIndexes[openAIToolIndex]; ok {
+		return idx
+	}
+	idx := p.NextContentBlockIndex
+	p.NextContentBlockIndex++
+	p.ToolCallBlockIndexes[openAIToolIndex] = idx
+	return idx
+}
+
+func collectOpenAIReasoningTexts(node gjson.Result) []string {
+	var texts []string
+	if !node.Exists() {
+		return texts
+	}
+
+	if node.IsArray() {
+		node.ForEach(func(_, value gjson.Result) bool {
+			texts = append(texts, collectOpenAIReasoningTexts(value)...)
+			return true
+		})
+		return texts
+	}
+
+	switch node.Type {
+	case gjson.String:
+		if text := strings.TrimSpace(node.String()); text != "" {
+			texts = append(texts, text)
+		}
+	case gjson.JSON:
+		if text := node.Get("text"); text.Exists() {
+			if trimmed := strings.TrimSpace(text.String()); trimmed != "" {
+				texts = append(texts, trimmed)
+			}
+		} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
+			texts = append(texts, raw)
+		}
+	}
+
+	return texts
+}
+
+func stopThinkingContentBlock(param *ConvertOpenAIResponseToAnthropicParams, results *[]string) {
+	if !param.ThinkingContentBlockStarted {
+		return
+	}
+	contentBlockStop := map[string]interface{}{
+		"type":  "content_block_stop",
+		"index": param.ThinkingContentBlockIndex,
+	}
+	contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
+	*results = append(*results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+	param.ThinkingContentBlockStarted = false
+	param.ThinkingContentBlockIndex = -1
+}
+
+func emitMessageStopIfNeeded(param *ConvertOpenAIResponseToAnthropicParams, results *[]string) {
+	if param.MessageStopSent {
+		return
+	}
+	*results = append(*results, "event: message_stop\ndata: {\"type\":\"message_stop\"}\n\n")
+	param.MessageStopSent = true
+}
+
+func stopTextContentBlock(param *ConvertOpenAIResponseToAnthropicParams, results *[]string) {
+	if !param.TextContentBlockStarted {
+		return
+	}
+	contentBlockStop := map[string]interface{}{
+		"type":  "content_block_stop",
+		"index": param.TextContentBlockIndex,
+	}
+	contentBlockStopJSON, _ := json.Marshal(contentBlockStop)
+	*results = append(*results, "event: content_block_stop\ndata: "+string(contentBlockStopJSON)+"\n\n")
+	param.TextContentBlockStarted = false
+	param.TextContentBlockIndex = -1
+}
+
 // ConvertOpenAIResponseToClaudeNonStream converts a non-streaming OpenAI response to a non-streaming Anthropic response.
 //
 // Parameters:
@@ -564,6 +778,18 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 				}
 			}

+			if reasoning := message.Get("reasoning_content"); reasoning.Exists() {
+				for _, reasoningText := range collectOpenAIReasoningTexts(reasoning) {
+					if reasoningText == "" {
+						continue
+					}
+					contentBlocks = append(contentBlocks, map[string]interface{}{
+						"type":     "thinking",
+						"thinking": reasoningText,
+					})
+				}
+			}
+
 			if toolCalls := message.Get("tool_calls"); toolCalls.Exists() && toolCalls.IsArray() {
 				toolCalls.ForEach(func(_, toolCall gjson.Result) bool {
 					hasToolCall = true
@@ -601,6 +827,8 @@ func ConvertOpenAIResponseToClaudeNonStream(_ context.Context, _ string, origina
 		usageJSON, _ = sjson.Set(usageJSON, "output_tokens", respUsage.Get("completion_tokens").Int())
 		parsedUsage := gjson.Parse(usageJSON).Value().(map[string]interface{})
 		response["usage"] = parsedUsage
+	} else {
+		response["usage"] = `{"input_tokens":0,"output_tokens":0}`
 	}

 	if response["stop_reason"] == nil {
--- a/internal/translator/openai/gemini/openai_gemini_response.go
+++ b/internal/translator/openai/gemini/openai_gemini_response.go
@@ -89,6 +89,9 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR
 					"candidatesTokenCount": usage.Get("completion_tokens").Int(),
 					"totalTokenCount":      usage.Get("total_tokens").Int(),
 				}
+				if reasoningTokens := reasoningTokensFromUsage(usage); reasoningTokens > 0 {
+					usageObj["thoughtsTokenCount"] = reasoningTokens
+				}
 				template, _ = sjson.Set(template, "usageMetadata", usageObj)
 				return []string{template}
 			}
@@ -108,6 +111,7 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR

 			_ = int(choice.Get("index").Int()) // choiceIdx not used in streaming
 			delta := choice.Get("delta")
+			baseTemplate := template

 			// Handle role (only in first chunk)
 			if role := delta.Get("role"); role.Exists() && (*param).(*ConvertOpenAIResponseToGeminiParams).IsFirstChunk {
@@ -120,6 +124,26 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR
 				return true
 			}

+			var chunkOutputs []string
+
+			// Handle reasoning/thinking delta
+			if reasoning := delta.Get("reasoning_content"); reasoning.Exists() {
+				for _, reasoningText := range extractReasoningTexts(reasoning) {
+					if reasoningText == "" {
+						continue
+					}
+					reasoningTemplate := baseTemplate
+					parts := []interface{}{
+						map[string]interface{}{
+							"thought": true,
+							"text":    reasoningText,
+						},
+					}
+					reasoningTemplate, _ = sjson.Set(reasoningTemplate, "candidates.0.content.parts", parts)
+					chunkOutputs = append(chunkOutputs, reasoningTemplate)
+				}
+			}
+
 			// Handle content delta
 			if content := delta.Get("content"); content.Exists() && content.String() != "" {
 				contentText := content.String()
@@ -131,8 +155,13 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR
 						"text": contentText,
 					},
 				}
-				template, _ = sjson.Set(template, "candidates.0.content.parts", parts)
-				results = append(results, template)
+				contentTemplate := baseTemplate
+				contentTemplate, _ = sjson.Set(contentTemplate, "candidates.0.content.parts", parts)
+				chunkOutputs = append(chunkOutputs, contentTemplate)
+			}
+
+			if len(chunkOutputs) > 0 {
+				results = append(results, chunkOutputs...)
 				return true
 			}

@@ -231,6 +260,9 @@ func ConvertOpenAIResponseToGemini(_ context.Context, _ string, originalRequestR
 					"candidatesTokenCount": usage.Get("completion_tokens").Int(),
 					"totalTokenCount":      usage.Get("total_tokens").Int(),
 				}
+				if reasoningTokens := reasoningTokensFromUsage(usage); reasoningTokens > 0 {
+					usageObj["thoughtsTokenCount"] = reasoningTokens
+				}
 				template, _ = sjson.Set(template, "usageMetadata", usageObj)
 				results = append(results, template)
 				return true
@@ -549,6 +581,19 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina

 			var parts []interface{}

+			// Handle reasoning content before visible text
+			if reasoning := message.Get("reasoning_content"); reasoning.Exists() {
+				for _, reasoningText := range extractReasoningTexts(reasoning) {
+					if reasoningText == "" {
+						continue
+					}
+					parts = append(parts, map[string]interface{}{
+						"thought": true,
+						"text":    reasoningText,
+					})
+				}
+			}
+
 			// Handle content first
 			if content := message.Get("content"); content.Exists() && content.String() != "" {
 				parts = append(parts, map[string]interface{}{
@@ -605,6 +650,9 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina
 			"candidatesTokenCount": usage.Get("completion_tokens").Int(),
 			"totalTokenCount":      usage.Get("total_tokens").Int(),
 		}
+		if reasoningTokens := reasoningTokensFromUsage(usage); reasoningTokens > 0 {
+			usageObj["thoughtsTokenCount"] = reasoningTokens
+		}
 		out, _ = sjson.Set(out, "usageMetadata", usageObj)
 	}

@@ -614,3 +662,43 @@ func ConvertOpenAIResponseToGeminiNonStream(_ context.Context, _ string, origina
 func GeminiTokenCount(ctx context.Context, count int64) string {
 	return fmt.Sprintf(`{"totalTokens":%d,"promptTokensDetails":[{"modality":"TEXT","tokenCount":%d}]}`, count, count)
 }
+
+func reasoningTokensFromUsage(usage gjson.Result) int64 {
+	if usage.Exists() {
+		if v := usage.Get("completion_tokens_details.reasoning_tokens"); v.Exists() {
+			return v.Int()
+		}
+		if v := usage.Get("output_tokens_details.reasoning_tokens"); v.Exists() {
+			return v.Int()
+		}
+	}
+	return 0
+}
+
+func extractReasoningTexts(node gjson.Result) []string {
+	var texts []string
+	if !node.Exists() {
+		return texts
+	}
+
+	if node.IsArray() {
+		node.ForEach(func(_, value gjson.Result) bool {
+			texts = append(texts, extractReasoningTexts(value)...)
+			return true
+		})
+		return texts
+	}
+
+	switch node.Type {
+	case gjson.String:
+		texts = append(texts, node.String())
+	case gjson.JSON:
+		if text := node.Get("text"); text.Exists() {
+			texts = append(texts, text.String())
+		} else if raw := strings.TrimSpace(node.Raw); raw != "" && !strings.HasPrefix(raw, "{") && !strings.HasPrefix(raw, "[") {
+			texts = append(texts, raw)
+		}
+	}
+
+	return texts
+}
Author	SHA1	Message	Date
Luis Pater	17016ae6a5	feat(registry): add Gemini 3 Pro Preview model definition Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-11-18 23:48:21 +08:00
Luis Pater	01b7b60901	feat(registry): add Gemini 3 Pro Preview model definition	2025-11-18 23:46:58 +08:00
Luis Pater	23a7633e6d	fix(registry): update Thinking parameters and replace Gemini-3 Preview with Gemini-2.5 Flash Lite Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-11-18 11:51:52 +08:00
Luis Pater	e5e985978d	Fixed: #263 Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details fix(translator): remove input_examples from tool schema in Gemini-Claude requests	2025-11-18 11:27:48 +08:00
Luis Pater	db2d22c978	fix(runtime): simplify scanner buffer allocation in executor implementations	2025-11-18 10:59:49 +08:00
Luis Pater	1c815c58a6	fix(translator): simplify string handling in Gemini responses Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-11-16 19:02:27 +08:00
Luis Pater	4eab141410	feat(translator): add support for reasoning/thinking content blocks in OpenAI-Claude and Gemini responses Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-11-16 17:37:39 +08:00
Luis Pater	5937b8e429	Fixed: #260 Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details fix(translator): handle simple string input conversion in Gemini responses	2025-11-16 13:30:11 +08:00
Luis Pater	9875565339	fix(claude translator): ensure default token counts when usage data is missing Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-11-16 13:18:21 +08:00
Luis Pater	faa483b57d	Merge pull request #257 from lollipopkit/main Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details fix(claude translator): guard tool schema properties	2025-11-16 12:19:38 +08:00
Luis Pater	f0711be302	fix(auth): prevent access to removed credentials lingering in memory Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details Add logic to avoid exposing credentials that have been removed from disk but still persist in memory. Ensure `runtimeOnly` checks and proper handling of disabled or removed authentication states.	2025-11-16 12:12:24 +08:00
Luis Pater	1d0f0301b4	refactor(api/config): centralize legacy OpenAI compatibility key migration Introduce `migrateLegacyOpenAICompatibilityKeys` to streamline and reuse the normalization of OpenAI compatibility entries. Remove redundant loops and enhance maintainability for compatibility key handling. Add cleanup for legacy `api-keys` in YAML configuration during persistence.	2025-11-16 11:39:35 +08:00
lollipopkit🏳️‍⚧️	c73b3fa43b	fix(claude translator): guard tool schema properties	2025-11-15 19:14:13 +08:00
Luis Pater	772fa69515	Fixed: #254 Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details feat(registry): add Kimi-K2-Thinking model to model definitions	2025-11-14 21:20:54 +08:00
Luis Pater	1ccb01631d	refactor(runtime): centralize reasoning effort logic for GPT models Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details Extract reasoning effort mapping into a reusable function `setReasoningEffortByAlias` to reduce redundancy and improve maintainability. Introduce support for the "gpt-5.1-none" variant in the registry and runtime executor.	2025-11-14 17:24:40 +08:00
Luis Pater	1ede1347fa	Merge pull request #249 from ben-vargas/fix-gpt5-1-reasoning fix(runtime): remove gpt-5.1 minimal effort variant	2025-11-14 17:04:27 +08:00
Ben Vargas	cfbaed0e90	fix(runtime): remove gpt-5.1 minimal effort variant Stop advertising and mapping the unsupported gpt-5.1-minimal variant in the model registry and Codex executor, and align bare gpt-5.1 requests to use medium reasoning effort like Codex CLI while preserving minimal for gpt-5.	2025-11-13 19:43:52 -07:00
Luis Pater	cf9b9be7ea	feat(runtime): extend executor support for GPT-5.1 Codex and variants Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details Expand executor logic to handle GPT-5.1 Codex family and its variants, including reasoning effort configurations for minimal, low, medium, and high levels. Ensure proper mapping of models to payload parameters.	2025-11-14 08:08:25 +08:00