feat(registry): add GetAvailableModelsByProvider method for retrieving models by provider

Merge pull request #814 from router-for-me/aistudio
Fix model alias thinking suffix
2025-12-31 23:37:46 +08:00 · 2025-12-31 03:08:05 -08:00 · 2025-12-31 18:07:13 +08:00 · 2025-12-31 17:14:47 +08:00 · 2025-12-31 17:09:22 +08:00 · 2025-12-31 17:09:22 +08:00
12 changed files with 512 additions and 51 deletions
--- a/internal/managementasset/updater.go
+++ b/internal/managementasset/updater.go
@@ -24,10 +24,11 @@ import (
 )
 const (
-	defaultManagementReleaseURL = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
+	defaultManagementReleaseURL  = "https://api.github.com/repos/router-for-me/Cli-Proxy-API-Management-Center/releases/latest"
-	managementAssetName         = "management.html"
+	defaultManagementFallbackURL = "https://cpamc.router-for.me/"
-	httpUserAgent               = "CLIProxyAPI-management-updater"
+	managementAssetName          = "management.html"
-	updateCheckInterval         = 3 * time.Hour
+	httpUserAgent                = "CLIProxyAPI-management-updater"
 	updateCheckInterval          = 3 * time.Hour
 )
 // ManagementFileName exposes the control panel asset filename.
@@ -198,6 +199,16 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
 		return
 	}
 	localPath := filepath.Join(staticDir, managementAssetName)
 	localFileMissing := false
 	if _, errStat := os.Stat(localPath); errStat != nil {
 		if errors.Is(errStat, os.ErrNotExist) {
 			localFileMissing = true
 		} else {
 			log.WithError(errStat).Debug("failed to stat local management asset")
 		}
 	}
 	// Rate limiting: check only once every 3 hours
 	lastUpdateCheckMu.Lock()
 	now := time.Now()
@@ -210,15 +221,14 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
 	lastUpdateCheckTime = now
 	lastUpdateCheckMu.Unlock()
-	if err := os.MkdirAll(staticDir, 0o755); err != nil {
+	if errMkdirAll := os.MkdirAll(staticDir, 0o755); errMkdirAll != nil {
-		log.WithError(err).Warn("failed to prepare static directory for management asset")
+		log.WithError(errMkdirAll).Warn("failed to prepare static directory for management asset")
 		return
 	}
 	releaseURL := resolveReleaseURL(panelRepository)
 	client := newHTTPClient(proxyURL)
 	localPath := filepath.Join(staticDir, managementAssetName)
 	localHash, err := fileSHA256(localPath)
 	if err != nil {
 		if !errors.Is(err, os.ErrNotExist) {
@@ -229,6 +239,13 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
 	asset, remoteHash, err := fetchLatestAsset(ctx, client, releaseURL)
 	if err != nil {
 		if localFileMissing {
 			log.WithError(err).Warn("failed to fetch latest management release information, trying fallback page")
 			if ensureFallbackManagementHTML(ctx, client, localPath) {
 				return
 			}
 			return
 		}
 		log.WithError(err).Warn("failed to fetch latest management release information")
 		return
 	}
@@ -240,6 +257,13 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
 	data, downloadedHash, err := downloadAsset(ctx, client, asset.BrowserDownloadURL)
 	if err != nil {
 		if localFileMissing {
 			log.WithError(err).Warn("failed to download management asset, trying fallback page")
 			if ensureFallbackManagementHTML(ctx, client, localPath) {
 				return
 			}
 			return
 		}
 		log.WithError(err).Warn("failed to download management asset")
 		return
 	}
@@ -256,6 +280,22 @@ func EnsureLatestManagementHTML(ctx context.Context, staticDir string, proxyURL
 	log.Infof("management asset updated successfully (hash=%s)", downloadedHash)
 }
 func ensureFallbackManagementHTML(ctx context.Context, client *http.Client, localPath string) bool {
 	data, downloadedHash, err := downloadAsset(ctx, client, defaultManagementFallbackURL)
 	if err != nil {
 		log.WithError(err).Warn("failed to download fallback management control panel page")
 		return false
 	}
 	if err = atomicWriteFile(localPath, data); err != nil {
 		log.WithError(err).Warn("failed to persist fallback management control panel page")
 		return false
 	}
 	log.Infof("management asset updated from fallback page successfully (hash=%s)", downloadedHash)
 	return true
 }
 func resolveReleaseURL(repo string) string {
 	repo = strings.TrimSpace(repo)
 	if repo == "" {
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -773,7 +773,7 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 	return map[string]*AntigravityModelConfig{
 		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
 		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
-		"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
+		"gemini-2.5-computer-use-preview-10-2025": {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-2.5-computer-use-preview-10-2025"},
 		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
 		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
 		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -625,6 +625,131 @@ func (r *ModelRegistry) GetAvailableModels(handlerType string) []map[string]any
 	return models
 }
 // GetAvailableModelsByProvider returns models available for the given provider identifier.
 // Parameters:
 //   - provider: Provider identifier (e.g., "codex", "gemini", "antigravity")
 //
 // Returns:
 //   - []*ModelInfo: List of available models for the provider
 func (r *ModelRegistry) GetAvailableModelsByProvider(provider string) []*ModelInfo {
 	provider = strings.ToLower(strings.TrimSpace(provider))
 	if provider == "" {
 		return nil
 	}
 	r.mutex.RLock()
 	defer r.mutex.RUnlock()
 	type providerModel struct {
 		count int
 		info  *ModelInfo
 	}
 	providerModels := make(map[string]*providerModel)
 	for clientID, clientProvider := range r.clientProviders {
 		if clientProvider != provider {
 			continue
 		}
 		modelIDs := r.clientModels[clientID]
 		if len(modelIDs) == 0 {
 			continue
 		}
 		clientInfos := r.clientModelInfos[clientID]
 		for _, modelID := range modelIDs {
 			modelID = strings.TrimSpace(modelID)
 			if modelID == "" {
 				continue
 			}
 			entry := providerModels[modelID]
 			if entry == nil {
 				entry = &providerModel{}
 				providerModels[modelID] = entry
 			}
 			entry.count++
 			if entry.info == nil {
 				if clientInfos != nil {
 					if info := clientInfos[modelID]; info != nil {
 						entry.info = info
 					}
 				}
 				if entry.info == nil {
 					if reg, ok := r.models[modelID]; ok && reg != nil && reg.Info != nil {
 						entry.info = reg.Info
 					}
 				}
 			}
 		}
 	}
 	if len(providerModels) == 0 {
 		return nil
 	}
 	quotaExpiredDuration := 5 * time.Minute
 	now := time.Now()
 	result := make([]*ModelInfo, 0, len(providerModels))
 	for modelID, entry := range providerModels {
 		if entry == nil || entry.count <= 0 {
 			continue
 		}
 		registration, ok := r.models[modelID]
 		expiredClients := 0
 		cooldownSuspended := 0
 		otherSuspended := 0
 		if ok && registration != nil {
 			if registration.QuotaExceededClients != nil {
 				for clientID, quotaTime := range registration.QuotaExceededClients {
 					if clientID == "" {
 						continue
 					}
 					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
 						continue
 					}
 					if quotaTime != nil && now.Sub(*quotaTime) < quotaExpiredDuration {
 						expiredClients++
 					}
 				}
 			}
 			if registration.SuspendedClients != nil {
 				for clientID, reason := range registration.SuspendedClients {
 					if clientID == "" {
 						continue
 					}
 					if p, okProvider := r.clientProviders[clientID]; !okProvider || p != provider {
 						continue
 					}
 					if strings.EqualFold(reason, "quota") {
 						cooldownSuspended++
 						continue
 					}
 					otherSuspended++
 				}
 			}
 		}
 		availableClients := entry.count
 		effectiveClients := availableClients - expiredClients - otherSuspended
 		if effectiveClients < 0 {
 			effectiveClients = 0
 		}
 		if effectiveClients > 0 || (availableClients > 0 && (expiredClients > 0 || cooldownSuspended > 0) && otherSuspended == 0) {
 			if entry.info != nil {
 				result = append(result, entry.info)
 				continue
 			}
 			if ok && registration != nil && registration.Info != nil {
 				result = append(result, registration.Info)
 			}
 		}
 	}
 	return result
 }
 // GetModelCount returns the number of available clients for a specific model
 // Parameters:
 //   - modelID: The model ID to check
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -96,9 +96,9 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	to := sdktranslator.FromString("antigravity")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
@@ -191,9 +191,9 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	to := sdktranslator.FromString("antigravity")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, true)
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
@@ -527,9 +527,9 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	to := sdktranslator.FromString("antigravity")
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = ApplyThinkingMetadataCLI(translated, req.Metadata, req.Model)
 	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
-	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
+	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated, isClaude)
 	translated = applyPayloadConfigWithRoot(e.cfg, req.Model, "antigravity", "request", translated)
@@ -697,8 +697,8 @@ func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyaut
 	for idx, baseURL := range baseURLs {
 		payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
-		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload)
+		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, payload)
 		payload = normalizeAntigravityThinking(req.Model, payload, isClaude)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -78,9 +78,9 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
-	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
@@ -217,9 +217,9 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	from := opts.SourceFormat
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
-	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = ApplyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
 	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
-	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
+	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
 	basePayload = fixGeminiCLIImageAspectRatio(req.Model, basePayload)
@@ -421,7 +421,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	// Gemini CLI endpoint when iterating fallback variants.
 	for _, attemptModel := range models {
 		payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
-		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+		payload = ApplyThinkingMetadataCLI(payload, req.Metadata, req.Model)
 		payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
--- a/internal/runtime/executor/payload_helpers.go
+++ b/internal/runtime/executor/payload_helpers.go
@@ -14,32 +14,54 @@ import (
 // ApplyThinkingMetadata applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for standard Gemini format payloads. It normalizes the budget when the model supports thinking.
 func ApplyThinkingMetadata(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	// Determine which model to use for thinking support check.
 	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
 	thinkingModel := lookupModel
 	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
 		thinkingModel = model
 	}
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(thinkingModel) {
 		return payload
 	}
 	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiThinkingConfig(payload, budgetOverride, includeOverride)
 }
-// applyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
+// ApplyThinkingMetadataCLI applies thinking config from model suffix metadata (e.g., (high), (8192))
 // for Gemini CLI format payloads (nested under "request"). It normalizes the budget when the model supports thinking.
-func applyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
+func ApplyThinkingMetadataCLI(payload []byte, metadata map[string]any, model string) []byte {
-	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(model, metadata)
+	// Use the alias from metadata if available, as it's registered in the global registry
 	// with thinking metadata; the upstream model name may not be registered.
 	lookupModel := util.ResolveOriginalModel(model, metadata)
 	// Determine which model to use for thinking support check.
 	// If the alias (lookupModel) is not in the registry, fall back to the upstream model.
 	thinkingModel := lookupModel
 	if !util.ModelSupportsThinking(lookupModel) && util.ModelSupportsThinking(model) {
 		thinkingModel = model
 	}
 	budgetOverride, includeOverride, ok := util.ResolveThinkingConfigFromMetadata(thinkingModel, metadata)
 	if !ok || (budgetOverride == nil && includeOverride == nil) {
 		return payload
 	}
-	if !util.ModelSupportsThinking(model) {
+	if !util.ModelSupportsThinking(thinkingModel) {
 		return payload
 	}
 	if budgetOverride != nil {
-		norm := util.NormalizeThinkingBudget(model, *budgetOverride)
+		norm := util.NormalizeThinkingBudget(thinkingModel, *budgetOverride)
 		budgetOverride = &norm
 	}
 	return util.ApplyGeminiCLIThinkingConfig(payload, budgetOverride, includeOverride)
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -288,37 +288,73 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 // ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
 // For standard Gemini API format (generationConfig.thinkingConfig path).
-// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
+// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
 // or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
 func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
-	if !IsGemini3Model(model) {
+	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	// Determine which model to use for validation
 	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	// First try to get effort string from metadata
 	effort, ok := ReasoningEffortFromMetadata(metadata)
-	if !ok || effort == "" {
+	if ok && effort != "" {
-		return body
+		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 			return ApplyGeminiThinkingLevel(body, level, nil)
 		}
 	}
-	// Validate and apply the thinkingLevel
+
-	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+	// Fallback: check for numeric budget and convert to thinkingLevel
-		return ApplyGeminiThinkingLevel(body, level, nil)
+	budget, _, _, matched := ThinkingFromMetadata(metadata)
 	if matched && budget != nil {
 		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
 			return ApplyGeminiThinkingLevel(body, level, nil)
 		}
 	}
 	return body
 }
 // ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
-// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
+// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal))
 // or numeric budget suffix (e.g., model(1000)) which gets converted to a thinkingLevel.
 func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
-	if !IsGemini3Model(model) {
+	// Use the alias from metadata if available for model type detection
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !IsGemini3Model(lookupModel) && !IsGemini3Model(model) {
 		return body
 	}
 	// Determine which model to use for validation
 	checkModel := model
 	if IsGemini3Model(lookupModel) {
 		checkModel = lookupModel
 	}
 	// First try to get effort string from metadata
 	effort, ok := ReasoningEffortFromMetadata(metadata)
-	if !ok || effort == "" {
+	if ok && effort != "" {
-		return body
+		if level, valid := ValidateGemini3ThinkingLevel(checkModel, effort); valid {
 			return ApplyGeminiCLIThinkingLevel(body, level, nil)
 		}
 	}
-	// Validate and apply the thinkingLevel
+
-	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+	// Fallback: check for numeric budget and convert to thinkingLevel
-		return ApplyGeminiCLIThinkingLevel(body, level, nil)
+	budget, _, _, matched := ThinkingFromMetadata(metadata)
 	if matched && budget != nil {
 		if level, valid := ThinkingBudgetToGemini3Level(checkModel, *budget); valid {
 			return ApplyGeminiCLIThinkingLevel(body, level, nil)
 		}
 	}
 	return body
 }
@@ -326,15 +362,17 @@ func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
 // For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
-func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
+func ApplyDefaultThinkingIfNeededCLI(model string, metadata map[string]any, body []byte) []byte {
-	if !ModelHasDefaultThinking(model) {
+	// Use the alias from metadata if available for model property lookup
 	lookupModel := ResolveOriginalModel(model, metadata)
 	if !ModelHasDefaultThinking(lookupModel) && !ModelHasDefaultThinking(model) {
 		return body
 	}
 	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
 		return body
 	}
 	// Gemini 3 models use thinkingLevel instead of thinkingBudget
-	if IsGemini3Model(model) {
+	if IsGemini3Model(lookupModel) || IsGemini3Model(model) {
 		// Don't set a default - let the API use its dynamic default ("high")
 		// Only set includeThoughts
 		updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
--- a/internal/util/thinking.go
+++ b/internal/util/thinking.go
@@ -12,9 +12,18 @@ func ModelSupportsThinking(model string) bool {
 	if model == "" {
 		return false
 	}
 	// First check the global dynamic registry
 	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil {
 		return info.Thinking != nil
 	}
 	// Fallback: check static model definitions
 	if info := registry.LookupStaticModelInfo(model); info != nil {
 		return info.Thinking != nil
 	}
 	// Fallback: check Antigravity static config
 	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil {
 		return cfg.Thinking != nil
 	}
 	return false
 }
@@ -63,11 +72,19 @@ func thinkingRangeFromRegistry(model string) (found bool, min int, max int, zero
 	if model == "" {
 		return false, 0, 0, false, false
 	}
-	info := registry.GetGlobalRegistry().GetModelInfo(model)
+	// First check global dynamic registry
-	if info == nil || info.Thinking == nil {
+	if info := registry.GetGlobalRegistry().GetModelInfo(model); info != nil && info.Thinking != nil {
-		return false, 0, 0, false, false
+		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 	}
-	return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
+	// Fallback: check static model definitions
 	if info := registry.LookupStaticModelInfo(model); info != nil && info.Thinking != nil {
 		return true, info.Thinking.Min, info.Thinking.Max, info.Thinking.ZeroAllowed, info.Thinking.DynamicAllowed
 	}
 	// Fallback: check Antigravity static config
 	if cfg := registry.GetAntigravityModelConfig()[model]; cfg != nil && cfg.Thinking != nil {
 		return true, cfg.Thinking.Min, cfg.Thinking.Max, cfg.Thinking.ZeroAllowed, cfg.Thinking.DynamicAllowed
 	}
 	return false, 0, 0, false, false
 }
 // GetModelThinkingLevels returns the discrete reasoning effort levels for the model.
--- a/sdk/api/management.go
+++ b/sdk/api/management.go
@@ -20,6 +20,7 @@ type ManagementTokenRequester interface {
 	RequestQwenToken(*gin.Context)
 	RequestIFlowToken(*gin.Context)
 	RequestIFlowCookieToken(*gin.Context)
 	GetAuthStatus(c *gin.Context)
 }
 type managementTokenRequester struct {
@@ -60,3 +61,7 @@ func (m *managementTokenRequester) RequestIFlowToken(c *gin.Context) {
 func (m *managementTokenRequester) RequestIFlowCookieToken(c *gin.Context) {
 	m.handler.RequestIFlowCookieToken(c)
 }
 func (m *managementTokenRequester) GetAuthStatus(c *gin.Context) {
 	m.handler.GetAuthStatus(c)
 }
--- a/sdk/cliproxy/auth/model_name_mappings.go
+++ b/sdk/cliproxy/auth/model_name_mappings.go
@@ -81,7 +81,9 @@ func (m *Manager) applyOAuthModelMapping(auth *Auth, requestedModel string, meta
 			out[k] = v
 		}
 	}
-	out[util.ModelMappingOriginalModelMetadataKey] = upstreamModel
+	// Store the requested alias (e.g., "gp") so downstream can use it to look up
 	// model metadata from the global registry where it was registered under this alias.
 	out[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 	return upstreamModel, out
 }
--- a/sdk/cliproxy/model_registry.go
+++ b/sdk/cliproxy/model_registry.go
@@ -13,6 +13,7 @@ type ModelRegistry interface {
 	ClearModelQuotaExceeded(clientID, modelID string)
 	ClientSupportsModel(clientID, modelID string) bool
 	GetAvailableModels(handlerType string) []map[string]any
 	GetAvailableModelsByProvider(provider string) []*ModelInfo
 }
 // GlobalModelRegistry returns the shared registry instance.
--- a/test/model_alias_thinking_suffix_test.go
+++ b/test/model_alias_thinking_suffix_test.go
@@ -0,0 +1,211 @@
 package test
 import (
 	"testing"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/runtime/executor"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/tidwall/gjson"
 )
 // TestModelAliasThinkingSuffix tests the 32 test cases defined in docs/thinking_suffix_test_cases.md
 // These tests verify the thinking suffix parsing and application logic across different providers.
 func TestModelAliasThinkingSuffix(t *testing.T) {
 	tests := []struct {
 		id            int
 		name          string
 		provider      string
 		requestModel  string
 		suffixType    string
 		expectedField string // "thinkingBudget", "thinkingLevel", "budget_tokens", "reasoning_effort", "enable_thinking"
 		expectedValue any
 		upstreamModel string // The upstream model after alias resolution
 		isAlias       bool
 	}{
 		// === 1. Antigravity Provider ===
 		// 1.1 Budget-only models (Gemini 2.5)
 		{1, "antigravity_original_numeric", "antigravity", "gemini-2.5-computer-use-preview-10-2025(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", false},
 		{2, "antigravity_alias_numeric", "antigravity", "gp(1000)", "numeric", "thinkingBudget", 1000, "gemini-2.5-computer-use-preview-10-2025", true},
 		// 1.2 Budget+Levels models (Gemini 3)
 		{3, "antigravity_original_numeric_to_level", "antigravity", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{4, "antigravity_original_level", "antigravity", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{5, "antigravity_alias_numeric_to_level", "antigravity", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{6, "antigravity_alias_level", "antigravity", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 2. Gemini CLI Provider ===
 		// 2.1 Budget-only models
 		{7, "gemini_cli_original_numeric", "gemini-cli", "gemini-2.5-pro(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", false},
 		{8, "gemini_cli_alias_numeric", "gemini-cli", "g25p(8192)", "numeric", "thinkingBudget", 8192, "gemini-2.5-pro", true},
 		// 2.2 Budget+Levels models
 		{9, "gemini_cli_original_numeric_to_level", "gemini-cli", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{10, "gemini_cli_original_level", "gemini-cli", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{11, "gemini_cli_alias_numeric_to_level", "gemini-cli", "gf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{12, "gemini_cli_alias_level", "gemini-cli", "gf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 3. Vertex Provider ===
 		// 3.1 Budget-only models
 		{13, "vertex_original_numeric", "vertex", "gemini-2.5-pro(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", false},
 		{14, "vertex_alias_numeric", "vertex", "vg25p(16384)", "numeric", "thinkingBudget", 16384, "gemini-2.5-pro", true},
 		// 3.2 Budget+Levels models
 		{15, "vertex_original_numeric_to_level", "vertex", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{16, "vertex_original_level", "vertex", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{17, "vertex_alias_numeric_to_level", "vertex", "vgf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{18, "vertex_alias_level", "vertex", "vgf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 4. AI Studio Provider ===
 		// 4.1 Budget-only models
 		{19, "aistudio_original_numeric", "aistudio", "gemini-2.5-pro(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", false},
 		{20, "aistudio_alias_numeric", "aistudio", "ag25p(12000)", "numeric", "thinkingBudget", 12000, "gemini-2.5-pro", true},
 		// 4.2 Budget+Levels models
 		{21, "aistudio_original_numeric_to_level", "aistudio", "gemini-3-flash-preview(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{22, "aistudio_original_level", "aistudio", "gemini-3-flash-preview(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", false},
 		{23, "aistudio_alias_numeric_to_level", "aistudio", "agf(1000)", "numeric", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		{24, "aistudio_alias_level", "aistudio", "agf(low)", "level", "thinkingLevel", "low", "gemini-3-flash-preview", true},
 		// === 5. Claude Provider ===
 		{25, "claude_original_numeric", "claude", "claude-sonnet-4-5-20250929(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", false},
 		{26, "claude_alias_numeric", "claude", "cs45(16384)", "numeric", "budget_tokens", 16384, "claude-sonnet-4-5-20250929", true},
 		// === 6. Codex Provider ===
 		{27, "codex_original_level", "codex", "gpt-5(high)", "level", "reasoning_effort", "high", "gpt-5", false},
 		{28, "codex_alias_level", "codex", "g5(high)", "level", "reasoning_effort", "high", "gpt-5", true},
 		// === 7. Qwen Provider ===
 		{29, "qwen_original_level", "qwen", "qwen3-coder-plus(high)", "level", "enable_thinking", true, "qwen3-coder-plus", false},
 		{30, "qwen_alias_level", "qwen", "qcp(high)", "level", "enable_thinking", true, "qwen3-coder-plus", true},
 		// === 8. iFlow Provider ===
 		{31, "iflow_original_level", "iflow", "glm-4.7(high)", "level", "reasoning_effort", "high", "glm-4.7", false},
 		{32, "iflow_alias_level", "iflow", "glm(high)", "level", "reasoning_effort", "high", "glm-4.7", true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Step 1: Parse model suffix (simulates SDK layer normalization)
 			// For "gp(1000)" -> requestedModel="gp", metadata={thinking_budget: 1000}
 			requestedModel, metadata := util.NormalizeThinkingModel(tt.requestModel)
 			// Verify suffix was parsed
 			if metadata == nil && (tt.suffixType == "numeric" || tt.suffixType == "level") {
 				t.Errorf("Case #%d: NormalizeThinkingModel(%q) metadata is nil", tt.id, tt.requestModel)
 				return
 			}
 			// Step 2: Simulate OAuth model mapping
 			// Real flow: applyOAuthModelMapping stores requestedModel (the alias) in metadata
 			if tt.isAlias {
 				if metadata == nil {
 					metadata = make(map[string]any)
 				}
 				metadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 			}
 			// Step 3: Verify metadata extraction
 			switch tt.suffixType {
 			case "numeric":
 				budget, _, _, matched := util.ThinkingFromMetadata(metadata)
 				if !matched {
 					t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
 					return
 				}
 				if budget == nil {
 					t.Errorf("Case #%d: expected budget in metadata", tt.id)
 					return
 				}
 				// For thinkingBudget/budget_tokens, verify the parsed budget value
 				if tt.expectedField == "thinkingBudget" || tt.expectedField == "budget_tokens" {
 					expectedBudget := tt.expectedValue.(int)
 					if *budget != expectedBudget {
 						t.Errorf("Case #%d: budget = %d, want %d", tt.id, *budget, expectedBudget)
 					}
 				}
 				// For thinkingLevel (Gemini 3), verify conversion from budget to level
 				if tt.expectedField == "thinkingLevel" {
 					level, ok := util.ThinkingBudgetToGemini3Level(tt.upstreamModel, *budget)
 					if !ok {
 						t.Errorf("Case #%d: ThinkingBudgetToGemini3Level failed", tt.id)
 						return
 					}
 					expectedLevel := tt.expectedValue.(string)
 					if level != expectedLevel {
 						t.Errorf("Case #%d: converted level = %q, want %q", tt.id, level, expectedLevel)
 					}
 				}
 			case "level":
 				_, _, effort, matched := util.ThinkingFromMetadata(metadata)
 				if !matched {
 					t.Errorf("Case #%d: ThinkingFromMetadata did not match", tt.id)
 					return
 				}
 				if effort == nil {
 					t.Errorf("Case #%d: expected effort in metadata", tt.id)
 					return
 				}
 				if tt.expectedField == "thinkingLevel" || tt.expectedField == "reasoning_effort" {
 					expectedEffort := tt.expectedValue.(string)
 					if *effort != expectedEffort {
 						t.Errorf("Case #%d: effort = %q, want %q", tt.id, *effort, expectedEffort)
 					}
 				}
 			}
 			// Step 4: Test Gemini-specific thinkingLevel conversion for Gemini 3 models
 			if tt.expectedField == "thinkingLevel" && util.IsGemini3Model(tt.upstreamModel) {
 				body := []byte(`{"request":{"contents":[]}}`)
 				// Build metadata simulating real OAuth flow:
 				// - requestedModel (alias like "gf") is stored in model_mapping_original_model
 				// - upstreamModel is passed as the model parameter
 				testMetadata := make(map[string]any)
 				if tt.isAlias {
 					// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
 					testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 				}
 				// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
 				for k, v := range metadata {
 					testMetadata[k] = v
 				}
 				result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(tt.upstreamModel, testMetadata, body)
 				levelVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
 				expectedLevel := tt.expectedValue.(string)
 				if !levelVal.Exists() {
 					t.Errorf("Case #%d: expected thinkingLevel in result", tt.id)
 				} else if levelVal.String() != expectedLevel {
 					t.Errorf("Case #%d: thinkingLevel = %q, want %q", tt.id, levelVal.String(), expectedLevel)
 				}
 			}
 			// Step 5: Test Gemini 2.5 thinkingBudget application using real ApplyThinkingMetadataCLI flow
 			if tt.expectedField == "thinkingBudget" && util.IsGemini25Model(tt.upstreamModel) {
 				body := []byte(`{"request":{"contents":[]}}`)
 				// Build metadata simulating real OAuth flow:
 				// - requestedModel (alias like "gp") is stored in model_mapping_original_model
 				// - upstreamModel is passed as the model parameter
 				testMetadata := make(map[string]any)
 				if tt.isAlias {
 					// Real flow: applyOAuthModelMapping stores requestedModel (the alias)
 					testMetadata[util.ModelMappingOriginalModelMetadataKey] = requestedModel
 				}
 				// Copy parsed metadata (thinking_budget, reasoning_effort, etc.)
 				for k, v := range metadata {
 					testMetadata[k] = v
 				}
 				// Use the exported ApplyThinkingMetadataCLI which includes the fallback logic
 				result := executor.ApplyThinkingMetadataCLI(body, testMetadata, tt.upstreamModel)
 				budgetVal := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingBudget")
 				expectedBudget := tt.expectedValue.(int)
 				if !budgetVal.Exists() {
 					t.Errorf("Case #%d: expected thinkingBudget in result", tt.id)
 				} else if int(budgetVal.Int()) != expectedBudget {
 					t.Errorf("Case #%d: thinkingBudget = %d, want %d", tt.id, int(budgetVal.Int()), expectedBudget)
 				}
 			}
 		})
 	}
 }
Author	SHA1	Message	Date
Luis Pater	8d15723195	feat(registry): add `GetAvailableModelsByProvider` method for retrieving models by provider Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-12-31 23:37:46 +08:00
Chén Mù	736e0aae86	Merge pull request #814 from router-for-me/aistudio Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details Fix model alias thinking suffix	2025-12-31 03:08:05 -08:00
hkfires	8bf3305b2b	fix(thinking): fallback to upstream model for thinking support when alias not in registry	2025-12-31 18:07:13 +08:00
hkfires	d00e3ea973	feat(thinking): add numeric budget to thinkingLevel conversion fallback	2025-12-31 17:14:47 +08:00
hkfires	89db4e9481	fix(thinking): use model alias for thinking config resolution in mapped models	2025-12-31 17:09:22 +08:00
hkfires	e332419081	feat(registry): add thinking support for gemini-2.5-computer-use-preview model	2025-12-31 17:09:22 +08:00
Luis Pater	e998b1229a	feat(updater): add fallback URL and logic for missing management asset Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-12-31 11:51:20 +08:00
Luis Pater	bbed134bd1	feat(api): add `GetAuthStatus` method to `ManagementTokenRequester` interface Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-12-31 09:40:48 +08:00