Merge pull request #623 from router-for-me/remote-OAuth

Remote OAuth
Merge pull request #618 from router-for-me/amp
2025-12-19 18:29:09 +08:00 · 2025-12-19 17:37:51 +08:00 · 2025-12-19 17:36:52 +08:00 · 2025-12-19 13:57:47 +08:00 · 2025-12-19 13:11:15 +08:00 · 2025-12-19 12:38:28 +08:00
17 changed files with 1605 additions and 131 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -27,5 +27,8 @@ config.yaml
 bin/*
 .claude/*
 .vscode/*
+.gemini/*
 .serena/*
-.bmad/*
+.agent/*
+.bmad/*
+_bmad/*
--- a/.github/workflows/pr-test-build.yml
+++ b/.github/workflows/pr-test-build.yml
@@ -0,0 +1,23 @@
+name: pr-test-build
+
+on:
+  pull_request:
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Set up Go
+        uses: actions/setup-go@v5
+        with:
+          go-version-file: go.mod
+          cache: true
+      - name: Build
+        run: |
+          go build -o test-output ./cmd/server
+          rm -f test-output
--- a/.gitignore
+++ b/.gitignore
@@ -30,8 +30,11 @@ GEMINI.md
 # Tooling metadata
 .vscode/*
 .claude/*
+.gemini/*
 .serena/*
+.agent/*
 .bmad/*
+_bmad/*

 # macOS
 .DS_Store
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -36,10 +36,6 @@ import (
 	"golang.org/x/oauth2/google"
 )

-var (
-	oauthStatus = make(map[string]string)
-)
-
 var lastRefreshKeys = []string{"last_refresh", "lastRefresh", "last_refreshed_at", "lastRefreshedAt"}

 const (
@@ -786,6 +782,8 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		return
 	}

+	RegisterOAuthSession(state, "anthropic")
+
 	isWebUI := isWebUIRequest(c)
 	if isWebUI {
 		targetURL, errTarget := h.managementCallbackURL("/anthropic/callback")
@@ -812,7 +810,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 			deadline := time.Now().Add(timeout)
 			for {
 				if time.Now().After(deadline) {
-					oauthStatus[state] = "Timeout waiting for OAuth callback"
+					SetOAuthSessionError(state, "Timeout waiting for OAuth callback")
 					return nil, fmt.Errorf("timeout waiting for OAuth callback")
 				}
 				data, errRead := os.ReadFile(path)
@@ -837,13 +835,13 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		if errStr := resultMap["error"]; errStr != "" {
 			oauthErr := claude.NewOAuthError(errStr, "", http.StatusBadRequest)
 			log.Error(claude.GetUserFriendlyMessage(oauthErr))
-			oauthStatus[state] = "Bad request"
+			SetOAuthSessionError(state, "Bad request")
 			return
 		}
 		if resultMap["state"] != state {
 			authErr := claude.NewAuthenticationError(claude.ErrInvalidState, fmt.Errorf("expected %s, got %s", state, resultMap["state"]))
 			log.Error(claude.GetUserFriendlyMessage(authErr))
-			oauthStatus[state] = "State code error"
+			SetOAuthSessionError(state, "State code error")
 			return
 		}

@@ -876,7 +874,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		if errDo != nil {
 			authErr := claude.NewAuthenticationError(claude.ErrCodeExchangeFailed, errDo)
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
-			oauthStatus[state] = "Failed to exchange authorization code for tokens"
+			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			return
 		}
 		defer func() {
@@ -887,7 +885,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		respBody, _ := io.ReadAll(resp.Body)
 		if resp.StatusCode != http.StatusOK {
 			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
-			oauthStatus[state] = fmt.Sprintf("token exchange failed with status %d", resp.StatusCode)
+			SetOAuthSessionError(state, fmt.Sprintf("token exchange failed with status %d", resp.StatusCode))
 			return
 		}
 		var tResp struct {
@@ -900,7 +898,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		}
 		if errU := json.Unmarshal(respBody, &tResp); errU != nil {
 			log.Errorf("failed to parse token response: %v", errU)
-			oauthStatus[state] = "Failed to parse token response"
+			SetOAuthSessionError(state, "Failed to parse token response")
 			return
 		}
 		bundle := &claude.ClaudeAuthBundle{
@@ -925,7 +923,7 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
 			log.Errorf("Failed to save authentication tokens: %v", errSave)
-			oauthStatus[state] = "Failed to save authentication tokens"
+			SetOAuthSessionError(state, "Failed to save authentication tokens")
 			return
 		}

@@ -934,10 +932,9 @@ func (h *Handler) RequestAnthropicToken(c *gin.Context) {
 			fmt.Println("API key obtained and saved")
 		}
 		fmt.Println("You can now use Claude services through this CLI")
-		delete(oauthStatus, state)
+		CompleteOAuthSession(state)
 	}()

-	oauthStatus[state] = ""
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }

@@ -968,6 +965,8 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 	state := fmt.Sprintf("gem-%d", time.Now().UnixNano())
 	authURL := conf.AuthCodeURL(state, oauth2.AccessTypeOffline, oauth2.SetAuthURLParam("prompt", "consent"))

+	RegisterOAuthSession(state, "gemini")
+
 	isWebUI := isWebUIRequest(c)
 	if isWebUI {
 		targetURL, errTarget := h.managementCallbackURL("/google/callback")
@@ -996,7 +995,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		for {
 			if time.Now().After(deadline) {
 				log.Error("oauth flow timed out")
-				oauthStatus[state] = "OAuth flow timed out"
+				SetOAuthSessionError(state, "OAuth flow timed out")
 				return
 			}
 			if data, errR := os.ReadFile(waitFile); errR == nil {
@@ -1005,13 +1004,13 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 				_ = os.Remove(waitFile)
 				if errStr := m["error"]; errStr != "" {
 					log.Errorf("Authentication failed: %s", errStr)
-					oauthStatus[state] = "Authentication failed"
+					SetOAuthSessionError(state, "Authentication failed")
 					return
 				}
 				authCode = m["code"]
 				if authCode == "" {
 					log.Errorf("Authentication failed: code not found")
-					oauthStatus[state] = "Authentication failed: code not found"
+					SetOAuthSessionError(state, "Authentication failed: code not found")
 					return
 				}
 				break
@@ -1023,7 +1022,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		token, err := conf.Exchange(ctx, authCode)
 		if err != nil {
 			log.Errorf("Failed to exchange token: %v", err)
-			oauthStatus[state] = "Failed to exchange token"
+			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}

@@ -1034,7 +1033,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		req, errNewRequest := http.NewRequestWithContext(ctx, "GET", "https://www.googleapis.com/oauth2/v1/userinfo?alt=json", nil)
 		if errNewRequest != nil {
 			log.Errorf("Could not get user info: %v", errNewRequest)
-			oauthStatus[state] = "Could not get user info"
+			SetOAuthSessionError(state, "Could not get user info")
 			return
 		}
 		req.Header.Set("Content-Type", "application/json")
@@ -1043,7 +1042,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		resp, errDo := authHTTPClient.Do(req)
 		if errDo != nil {
 			log.Errorf("Failed to execute request: %v", errDo)
-			oauthStatus[state] = "Failed to execute request"
+			SetOAuthSessionError(state, "Failed to execute request")
 			return
 		}
 		defer func() {
@@ -1055,7 +1054,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		bodyBytes, _ := io.ReadAll(resp.Body)
 		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 			log.Errorf("Get user info request failed with status %d: %s", resp.StatusCode, string(bodyBytes))
-			oauthStatus[state] = fmt.Sprintf("Get user info request failed with status %d", resp.StatusCode)
+			SetOAuthSessionError(state, fmt.Sprintf("Get user info request failed with status %d", resp.StatusCode))
 			return
 		}

@@ -1064,7 +1063,6 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 			fmt.Printf("Authenticated user email: %s\n", email)
 		} else {
 			fmt.Println("Failed to get user email from token")
-			oauthStatus[state] = "Failed to get user email from token"
 		}

 		// Marshal/unmarshal oauth2.Token to generic map and enrich fields
@@ -1072,7 +1070,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		jsonData, _ := json.Marshal(token)
 		if errUnmarshal := json.Unmarshal(jsonData, &ifToken); errUnmarshal != nil {
 			log.Errorf("Failed to unmarshal token: %v", errUnmarshal)
-			oauthStatus[state] = "Failed to unmarshal token"
+			SetOAuthSessionError(state, "Failed to unmarshal token")
 			return
 		}

@@ -1098,7 +1096,7 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		gemClient, errGetClient := gemAuth.GetAuthenticatedClient(ctx, &ts, h.cfg, true)
 		if errGetClient != nil {
 			log.Errorf("failed to get authenticated client: %v", errGetClient)
-			oauthStatus[state] = "Failed to get authenticated client"
+			SetOAuthSessionError(state, "Failed to get authenticated client")
 			return
 		}
 		fmt.Println("Authentication successful.")
@@ -1108,12 +1106,12 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 			projects, errAll := onboardAllGeminiProjects(ctx, gemClient, &ts)
 			if errAll != nil {
 				log.Errorf("Failed to complete Gemini CLI onboarding: %v", errAll)
-				oauthStatus[state] = "Failed to complete Gemini CLI onboarding"
+				SetOAuthSessionError(state, "Failed to complete Gemini CLI onboarding")
 				return
 			}
 			if errVerify := ensureGeminiProjectsEnabled(ctx, gemClient, projects); errVerify != nil {
 				log.Errorf("Failed to verify Cloud AI API status: %v", errVerify)
-				oauthStatus[state] = "Failed to verify Cloud AI API status"
+				SetOAuthSessionError(state, "Failed to verify Cloud AI API status")
 				return
 			}
 			ts.ProjectID = strings.Join(projects, ",")
@@ -1121,26 +1119,26 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		} else {
 			if errEnsure := ensureGeminiProjectAndOnboard(ctx, gemClient, &ts, requestedProjectID); errEnsure != nil {
 				log.Errorf("Failed to complete Gemini CLI onboarding: %v", errEnsure)
-				oauthStatus[state] = "Failed to complete Gemini CLI onboarding"
+				SetOAuthSessionError(state, "Failed to complete Gemini CLI onboarding")
 				return
 			}

 			if strings.TrimSpace(ts.ProjectID) == "" {
 				log.Error("Onboarding did not return a project ID")
-				oauthStatus[state] = "Failed to resolve project ID"
+				SetOAuthSessionError(state, "Failed to resolve project ID")
 				return
 			}

 			isChecked, errCheck := checkCloudAPIIsEnabled(ctx, gemClient, ts.ProjectID)
 			if errCheck != nil {
 				log.Errorf("Failed to verify Cloud AI API status: %v", errCheck)
-				oauthStatus[state] = "Failed to verify Cloud AI API status"
+				SetOAuthSessionError(state, "Failed to verify Cloud AI API status")
 				return
 			}
 			ts.Checked = isChecked
 			if !isChecked {
 				log.Error("Cloud AI API is not enabled for the selected project")
-				oauthStatus[state] = "Cloud AI API not enabled"
+				SetOAuthSessionError(state, "Cloud AI API not enabled")
 				return
 			}
 		}
@@ -1163,15 +1161,14 @@ func (h *Handler) RequestGeminiCLIToken(c *gin.Context) {
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
 			log.Errorf("Failed to save token to file: %v", errSave)
-			oauthStatus[state] = "Failed to save token to file"
+			SetOAuthSessionError(state, "Failed to save token to file")
 			return
 		}

-		delete(oauthStatus, state)
+		CompleteOAuthSession(state)
 		fmt.Printf("You can now use Gemini CLI services through this CLI; token saved to %s\n", savedPath)
 	}()

-	oauthStatus[state] = ""
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }

@@ -1207,6 +1204,8 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		return
 	}

+	RegisterOAuthSession(state, "codex")
+
 	isWebUI := isWebUIRequest(c)
 	if isWebUI {
 		targetURL, errTarget := h.managementCallbackURL("/codex/callback")
@@ -1235,7 +1234,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 			if time.Now().After(deadline) {
 				authErr := codex.NewAuthenticationError(codex.ErrCallbackTimeout, fmt.Errorf("timeout waiting for OAuth callback"))
 				log.Error(codex.GetUserFriendlyMessage(authErr))
-				oauthStatus[state] = "Timeout waiting for OAuth callback"
+				SetOAuthSessionError(state, "Timeout waiting for OAuth callback")
 				return
 			}
 			if data, errR := os.ReadFile(waitFile); errR == nil {
@@ -1245,12 +1244,12 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 				if errStr := m["error"]; errStr != "" {
 					oauthErr := codex.NewOAuthError(errStr, "", http.StatusBadRequest)
 					log.Error(codex.GetUserFriendlyMessage(oauthErr))
-					oauthStatus[state] = "Bad Request"
+					SetOAuthSessionError(state, "Bad Request")
 					return
 				}
 				if m["state"] != state {
 					authErr := codex.NewAuthenticationError(codex.ErrInvalidState, fmt.Errorf("expected %s, got %s", state, m["state"]))
-					oauthStatus[state] = "State code error"
+					SetOAuthSessionError(state, "State code error")
 					log.Error(codex.GetUserFriendlyMessage(authErr))
 					return
 				}
@@ -1281,14 +1280,14 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		resp, errDo := httpClient.Do(req)
 		if errDo != nil {
 			authErr := codex.NewAuthenticationError(codex.ErrCodeExchangeFailed, errDo)
-			oauthStatus[state] = "Failed to exchange authorization code for tokens"
+			SetOAuthSessionError(state, "Failed to exchange authorization code for tokens")
 			log.Errorf("Failed to exchange authorization code for tokens: %v", authErr)
 			return
 		}
 		defer func() { _ = resp.Body.Close() }()
 		respBody, _ := io.ReadAll(resp.Body)
 		if resp.StatusCode != http.StatusOK {
-			oauthStatus[state] = fmt.Sprintf("Token exchange failed with status %d", resp.StatusCode)
+			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed with status %d", resp.StatusCode))
 			log.Errorf("token exchange failed with status %d: %s", resp.StatusCode, string(respBody))
 			return
 		}
@@ -1299,7 +1298,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 			ExpiresIn    int    `json:"expires_in"`
 		}
 		if errU := json.Unmarshal(respBody, &tokenResp); errU != nil {
-			oauthStatus[state] = "Failed to parse token response"
+			SetOAuthSessionError(state, "Failed to parse token response")
 			log.Errorf("failed to parse token response: %v", errU)
 			return
 		}
@@ -1337,7 +1336,7 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 		}
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
-			oauthStatus[state] = "Failed to save authentication tokens"
+			SetOAuthSessionError(state, "Failed to save authentication tokens")
 			log.Errorf("Failed to save authentication tokens: %v", errSave)
 			return
 		}
@@ -1346,10 +1345,9 @@ func (h *Handler) RequestCodexToken(c *gin.Context) {
 			fmt.Println("API key obtained and saved")
 		}
 		fmt.Println("You can now use Codex services through this CLI")
-		delete(oauthStatus, state)
+		CompleteOAuthSession(state)
 	}()

-	oauthStatus[state] = ""
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }

@@ -1390,6 +1388,8 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 	params.Set("state", state)
 	authURL := "https://accounts.google.com/o/oauth2/v2/auth?" + params.Encode()

+	RegisterOAuthSession(state, "antigravity")
+
 	isWebUI := isWebUIRequest(c)
 	if isWebUI {
 		targetURL, errTarget := h.managementCallbackURL("/antigravity/callback")
@@ -1416,7 +1416,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		for {
 			if time.Now().After(deadline) {
 				log.Error("oauth flow timed out")
-				oauthStatus[state] = "OAuth flow timed out"
+				SetOAuthSessionError(state, "OAuth flow timed out")
 				return
 			}
 			if data, errReadFile := os.ReadFile(waitFile); errReadFile == nil {
@@ -1425,18 +1425,18 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 				_ = os.Remove(waitFile)
 				if errStr := strings.TrimSpace(payload["error"]); errStr != "" {
 					log.Errorf("Authentication failed: %s", errStr)
-					oauthStatus[state] = "Authentication failed"
+					SetOAuthSessionError(state, "Authentication failed")
 					return
 				}
 				if payloadState := strings.TrimSpace(payload["state"]); payloadState != "" && payloadState != state {
 					log.Errorf("Authentication failed: state mismatch")
-					oauthStatus[state] = "Authentication failed: state mismatch"
+					SetOAuthSessionError(state, "Authentication failed: state mismatch")
 					return
 				}
 				authCode = strings.TrimSpace(payload["code"])
 				if authCode == "" {
 					log.Error("Authentication failed: code not found")
-					oauthStatus[state] = "Authentication failed: code not found"
+					SetOAuthSessionError(state, "Authentication failed: code not found")
 					return
 				}
 				break
@@ -1455,7 +1455,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		req, errNewRequest := http.NewRequestWithContext(ctx, http.MethodPost, "https://oauth2.googleapis.com/token", strings.NewReader(form.Encode()))
 		if errNewRequest != nil {
 			log.Errorf("Failed to build token request: %v", errNewRequest)
-			oauthStatus[state] = "Failed to build token request"
+			SetOAuthSessionError(state, "Failed to build token request")
 			return
 		}
 		req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
@@ -1463,7 +1463,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		resp, errDo := httpClient.Do(req)
 		if errDo != nil {
 			log.Errorf("Failed to execute token request: %v", errDo)
-			oauthStatus[state] = "Failed to exchange token"
+			SetOAuthSessionError(state, "Failed to exchange token")
 			return
 		}
 		defer func() {
@@ -1475,7 +1475,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices {
 			bodyBytes, _ := io.ReadAll(resp.Body)
 			log.Errorf("Antigravity token exchange failed with status %d: %s", resp.StatusCode, string(bodyBytes))
-			oauthStatus[state] = fmt.Sprintf("Token exchange failed: %d", resp.StatusCode)
+			SetOAuthSessionError(state, fmt.Sprintf("Token exchange failed: %d", resp.StatusCode))
 			return
 		}

@@ -1487,7 +1487,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		}
 		if errDecode := json.NewDecoder(resp.Body).Decode(&tokenResp); errDecode != nil {
 			log.Errorf("Failed to parse token response: %v", errDecode)
-			oauthStatus[state] = "Failed to parse token response"
+			SetOAuthSessionError(state, "Failed to parse token response")
 			return
 		}

@@ -1496,7 +1496,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			infoReq, errInfoReq := http.NewRequestWithContext(ctx, http.MethodGet, "https://www.googleapis.com/oauth2/v1/userinfo?alt=json", nil)
 			if errInfoReq != nil {
 				log.Errorf("Failed to build user info request: %v", errInfoReq)
-				oauthStatus[state] = "Failed to build user info request"
+				SetOAuthSessionError(state, "Failed to build user info request")
 				return
 			}
 			infoReq.Header.Set("Authorization", "Bearer "+tokenResp.AccessToken)
@@ -1504,7 +1504,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			infoResp, errInfo := httpClient.Do(infoReq)
 			if errInfo != nil {
 				log.Errorf("Failed to execute user info request: %v", errInfo)
-				oauthStatus[state] = "Failed to execute user info request"
+				SetOAuthSessionError(state, "Failed to execute user info request")
 				return
 			}
 			defer func() {
@@ -1523,7 +1523,7 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 			} else {
 				bodyBytes, _ := io.ReadAll(infoResp.Body)
 				log.Errorf("User info request failed with status %d: %s", infoResp.StatusCode, string(bodyBytes))
-				oauthStatus[state] = fmt.Sprintf("User info request failed: %d", infoResp.StatusCode)
+				SetOAuthSessionError(state, fmt.Sprintf("User info request failed: %d", infoResp.StatusCode))
 				return
 			}
 		}
@@ -1571,11 +1571,11 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
 			log.Errorf("Failed to save token to file: %v", errSave)
-			oauthStatus[state] = "Failed to save token to file"
+			SetOAuthSessionError(state, "Failed to save token to file")
 			return
 		}

-		delete(oauthStatus, state)
+		CompleteOAuthSession(state)
 		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
 		if projectID != "" {
 			fmt.Printf("Using GCP project: %s\n", projectID)
@@ -1583,7 +1583,6 @@ func (h *Handler) RequestAntigravityToken(c *gin.Context) {
 		fmt.Println("You can now use Antigravity services through this CLI")
 	}()

-	oauthStatus[state] = ""
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }

@@ -1605,11 +1604,13 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 	}
 	authURL := deviceFlow.VerificationURIComplete

+	RegisterOAuthSession(state, "qwen")
+
 	go func() {
 		fmt.Println("Waiting for authentication...")
 		tokenData, errPollForToken := qwenAuth.PollForToken(deviceFlow.DeviceCode, deviceFlow.CodeVerifier)
 		if errPollForToken != nil {
-			oauthStatus[state] = "Authentication failed"
+			SetOAuthSessionError(state, "Authentication failed")
 			fmt.Printf("Authentication failed: %v\n", errPollForToken)
 			return
 		}
@@ -1628,16 +1629,15 @@ func (h *Handler) RequestQwenToken(c *gin.Context) {
 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
 			log.Errorf("Failed to save authentication tokens: %v", errSave)
-			oauthStatus[state] = "Failed to save authentication tokens"
+			SetOAuthSessionError(state, "Failed to save authentication tokens")
 			return
 		}

 		fmt.Printf("Authentication successful! Token saved to %s\n", savedPath)
 		fmt.Println("You can now use Qwen services through this CLI")
-		delete(oauthStatus, state)
+		CompleteOAuthSession(state)
 	}()

-	oauthStatus[state] = ""
 	c.JSON(200, gin.H{"status": "ok", "url": authURL, "state": state})
 }

@@ -1650,6 +1650,8 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 	authSvc := iflowauth.NewIFlowAuth(h.cfg)
 	authURL, redirectURI := authSvc.AuthorizationURL(state, iflowauth.CallbackPort)

+	RegisterOAuthSession(state, "iflow")
+
 	isWebUI := isWebUIRequest(c)
 	if isWebUI {
 		targetURL, errTarget := h.managementCallbackURL("/iflow/callback")
@@ -1676,7 +1678,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		var resultMap map[string]string
 		for {
 			if time.Now().After(deadline) {
-				oauthStatus[state] = "Authentication failed"
+				SetOAuthSessionError(state, "Authentication failed")
 				fmt.Println("Authentication failed: timeout waiting for callback")
 				return
 			}
@@ -1689,26 +1691,26 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 		}

 		if errStr := strings.TrimSpace(resultMap["error"]); errStr != "" {
-			oauthStatus[state] = "Authentication failed"
+			SetOAuthSessionError(state, "Authentication failed")
 			fmt.Printf("Authentication failed: %s\n", errStr)
 			return
 		}
 		if resultState := strings.TrimSpace(resultMap["state"]); resultState != state {
-			oauthStatus[state] = "Authentication failed"
+			SetOAuthSessionError(state, "Authentication failed")
 			fmt.Println("Authentication failed: state mismatch")
 			return
 		}

 		code := strings.TrimSpace(resultMap["code"])
 		if code == "" {
-			oauthStatus[state] = "Authentication failed"
+			SetOAuthSessionError(state, "Authentication failed")
 			fmt.Println("Authentication failed: code missing")
 			return
 		}

 		tokenData, errExchange := authSvc.ExchangeCodeForTokens(ctx, code, redirectURI)
 		if errExchange != nil {
-			oauthStatus[state] = "Authentication failed"
+			SetOAuthSessionError(state, "Authentication failed")
 			fmt.Printf("Authentication failed: %v\n", errExchange)
 			return
 		}
@@ -1730,7 +1732,7 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {

 		savedPath, errSave := h.saveTokenRecord(ctx, record)
 		if errSave != nil {
-			oauthStatus[state] = "Failed to save authentication tokens"
+			SetOAuthSessionError(state, "Failed to save authentication tokens")
 			log.Errorf("Failed to save authentication tokens: %v", errSave)
 			return
 		}
@@ -1740,10 +1742,9 @@ func (h *Handler) RequestIFlowToken(c *gin.Context) {
 			fmt.Println("API key obtained and saved")
 		}
 		fmt.Println("You can now use iFlow services through this CLI")
-		delete(oauthStatus, state)
+		CompleteOAuthSession(state)
 	}()

-	oauthStatus[state] = ""
 	c.JSON(http.StatusOK, gin.H{"status": "ok", "url": authURL, "state": state})
 }

@@ -2179,16 +2180,24 @@ func checkCloudAPIIsEnabled(ctx context.Context, httpClient *http.Client, projec
 }

 func (h *Handler) GetAuthStatus(c *gin.Context) {
-	state := c.Query("state")
-	if err, ok := oauthStatus[state]; ok {
-		if err != "" {
-			c.JSON(200, gin.H{"status": "error", "error": err})
-		} else {
-			c.JSON(200, gin.H{"status": "wait"})
-			return
-		}
-	} else {
-		c.JSON(200, gin.H{"status": "ok"})
+	state := strings.TrimSpace(c.Query("state"))
+	if state == "" {
+		c.JSON(http.StatusOK, gin.H{"status": "ok"})
+		return
 	}
-	delete(oauthStatus, state)
+	if err := ValidateOAuthState(state); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "invalid state"})
+		return
+	}
+
+	_, status, ok := GetOAuthSession(state)
+	if !ok {
+		c.JSON(http.StatusOK, gin.H{"status": "ok"})
+		return
+	}
+	if status != "" {
+		c.JSON(http.StatusOK, gin.H{"status": "error", "error": status})
+		return
+	}
+	c.JSON(http.StatusOK, gin.H{"status": "wait"})
 }
--- a/internal/api/handlers/management/oauth_callback.go
+++ b/internal/api/handlers/management/oauth_callback.go
@@ -0,0 +1,100 @@
+package management
+
+import (
+	"errors"
+	"net/http"
+	"net/url"
+	"strings"
+
+	"github.com/gin-gonic/gin"
+)
+
+type oauthCallbackRequest struct {
+	Provider    string `json:"provider"`
+	RedirectURL string `json:"redirect_url"`
+	Code        string `json:"code"`
+	State       string `json:"state"`
+	Error       string `json:"error"`
+}
+
+func (h *Handler) PostOAuthCallback(c *gin.Context) {
+	if h == nil || h.cfg == nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "handler not initialized"})
+		return
+	}
+
+	var req oauthCallbackRequest
+	if err := c.ShouldBindJSON(&req); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "invalid body"})
+		return
+	}
+
+	canonicalProvider, err := NormalizeOAuthProvider(req.Provider)
+	if err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "unsupported provider"})
+		return
+	}
+
+	state := strings.TrimSpace(req.State)
+	code := strings.TrimSpace(req.Code)
+	errMsg := strings.TrimSpace(req.Error)
+
+	if rawRedirect := strings.TrimSpace(req.RedirectURL); rawRedirect != "" {
+		u, errParse := url.Parse(rawRedirect)
+		if errParse != nil {
+			c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "invalid redirect_url"})
+			return
+		}
+		q := u.Query()
+		if state == "" {
+			state = strings.TrimSpace(q.Get("state"))
+		}
+		if code == "" {
+			code = strings.TrimSpace(q.Get("code"))
+		}
+		if errMsg == "" {
+			errMsg = strings.TrimSpace(q.Get("error"))
+			if errMsg == "" {
+				errMsg = strings.TrimSpace(q.Get("error_description"))
+			}
+		}
+	}
+
+	if state == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "state is required"})
+		return
+	}
+	if err := ValidateOAuthState(state); err != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "invalid state"})
+		return
+	}
+	if code == "" && errMsg == "" {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "code or error is required"})
+		return
+	}
+
+	sessionProvider, sessionStatus, ok := GetOAuthSession(state)
+	if !ok {
+		c.JSON(http.StatusNotFound, gin.H{"status": "error", "error": "unknown or expired state"})
+		return
+	}
+	if sessionStatus != "" {
+		c.JSON(http.StatusConflict, gin.H{"status": "error", "error": "oauth flow is not pending"})
+		return
+	}
+	if !strings.EqualFold(sessionProvider, canonicalProvider) {
+		c.JSON(http.StatusBadRequest, gin.H{"status": "error", "error": "provider does not match state"})
+		return
+	}
+
+	if _, errWrite := WriteOAuthCallbackFileForPendingSession(h.cfg.AuthDir, canonicalProvider, state, code, errMsg); errWrite != nil {
+		if errors.Is(errWrite, errOAuthSessionNotPending) {
+			c.JSON(http.StatusConflict, gin.H{"status": "error", "error": "oauth flow is not pending"})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"status": "error", "error": "failed to persist oauth callback"})
+		return
+	}
+
+	c.JSON(http.StatusOK, gin.H{"status": "ok"})
+}
--- a/internal/api/handlers/management/oauth_sessions.go
+++ b/internal/api/handlers/management/oauth_sessions.go
@@ -0,0 +1,258 @@
+package management
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+)
+
+const (
+	oauthSessionTTL     = 10 * time.Minute
+	maxOAuthStateLength = 128
+)
+
+var (
+	errInvalidOAuthState      = errors.New("invalid oauth state")
+	errUnsupportedOAuthFlow   = errors.New("unsupported oauth provider")
+	errOAuthSessionNotPending = errors.New("oauth session is not pending")
+)
+
+type oauthSession struct {
+	Provider  string
+	Status    string
+	CreatedAt time.Time
+	ExpiresAt time.Time
+}
+
+type oauthSessionStore struct {
+	mu       sync.RWMutex
+	ttl      time.Duration
+	sessions map[string]oauthSession
+}
+
+func newOAuthSessionStore(ttl time.Duration) *oauthSessionStore {
+	if ttl <= 0 {
+		ttl = oauthSessionTTL
+	}
+	return &oauthSessionStore{
+		ttl:      ttl,
+		sessions: make(map[string]oauthSession),
+	}
+}
+
+func (s *oauthSessionStore) purgeExpiredLocked(now time.Time) {
+	for state, session := range s.sessions {
+		if !session.ExpiresAt.IsZero() && now.After(session.ExpiresAt) {
+			delete(s.sessions, state)
+		}
+	}
+}
+
+func (s *oauthSessionStore) Register(state, provider string) {
+	state = strings.TrimSpace(state)
+	provider = strings.ToLower(strings.TrimSpace(provider))
+	if state == "" || provider == "" {
+		return
+	}
+	now := time.Now()
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.purgeExpiredLocked(now)
+	s.sessions[state] = oauthSession{
+		Provider:  provider,
+		Status:    "",
+		CreatedAt: now,
+		ExpiresAt: now.Add(s.ttl),
+	}
+}
+
+func (s *oauthSessionStore) SetError(state, message string) {
+	state = strings.TrimSpace(state)
+	message = strings.TrimSpace(message)
+	if state == "" {
+		return
+	}
+	if message == "" {
+		message = "Authentication failed"
+	}
+	now := time.Now()
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.purgeExpiredLocked(now)
+	session, ok := s.sessions[state]
+	if !ok {
+		return
+	}
+	session.Status = message
+	session.ExpiresAt = now.Add(s.ttl)
+	s.sessions[state] = session
+}
+
+func (s *oauthSessionStore) Complete(state string) {
+	state = strings.TrimSpace(state)
+	if state == "" {
+		return
+	}
+	now := time.Now()
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.purgeExpiredLocked(now)
+	delete(s.sessions, state)
+}
+
+func (s *oauthSessionStore) Get(state string) (oauthSession, bool) {
+	state = strings.TrimSpace(state)
+	now := time.Now()
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.purgeExpiredLocked(now)
+	session, ok := s.sessions[state]
+	return session, ok
+}
+
+func (s *oauthSessionStore) IsPending(state, provider string) bool {
+	state = strings.TrimSpace(state)
+	provider = strings.ToLower(strings.TrimSpace(provider))
+	now := time.Now()
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	s.purgeExpiredLocked(now)
+	session, ok := s.sessions[state]
+	if !ok {
+		return false
+	}
+	if session.Status != "" {
+		return false
+	}
+	if provider == "" {
+		return true
+	}
+	return strings.EqualFold(session.Provider, provider)
+}
+
+var oauthSessions = newOAuthSessionStore(oauthSessionTTL)
+
+func RegisterOAuthSession(state, provider string) { oauthSessions.Register(state, provider) }
+
+func SetOAuthSessionError(state, message string) { oauthSessions.SetError(state, message) }
+
+func CompleteOAuthSession(state string) { oauthSessions.Complete(state) }
+
+func GetOAuthSession(state string) (provider string, status string, ok bool) {
+	session, ok := oauthSessions.Get(state)
+	if !ok {
+		return "", "", false
+	}
+	return session.Provider, session.Status, true
+}
+
+func IsOAuthSessionPending(state, provider string) bool {
+	return oauthSessions.IsPending(state, provider)
+}
+
+func ValidateOAuthState(state string) error {
+	trimmed := strings.TrimSpace(state)
+	if trimmed == "" {
+		return fmt.Errorf("%w: empty", errInvalidOAuthState)
+	}
+	if len(trimmed) > maxOAuthStateLength {
+		return fmt.Errorf("%w: too long", errInvalidOAuthState)
+	}
+	if strings.Contains(trimmed, "/") || strings.Contains(trimmed, "\\") {
+		return fmt.Errorf("%w: contains path separator", errInvalidOAuthState)
+	}
+	if strings.Contains(trimmed, "..") {
+		return fmt.Errorf("%w: contains '..'", errInvalidOAuthState)
+	}
+	for _, r := range trimmed {
+		switch {
+		case r >= 'a' && r <= 'z':
+		case r >= 'A' && r <= 'Z':
+		case r >= '0' && r <= '9':
+		case r == '-' || r == '_' || r == '.':
+		default:
+			return fmt.Errorf("%w: invalid character", errInvalidOAuthState)
+		}
+	}
+	return nil
+}
+
+func NormalizeOAuthProvider(provider string) (string, error) {
+	switch strings.ToLower(strings.TrimSpace(provider)) {
+	case "anthropic", "claude":
+		return "anthropic", nil
+	case "codex", "openai":
+		return "codex", nil
+	case "gemini", "google":
+		return "gemini", nil
+	case "iflow", "i-flow":
+		return "iflow", nil
+	case "antigravity", "anti-gravity":
+		return "antigravity", nil
+	case "qwen":
+		return "qwen", nil
+	default:
+		return "", errUnsupportedOAuthFlow
+	}
+}
+
+type oauthCallbackFilePayload struct {
+	Code  string `json:"code"`
+	State string `json:"state"`
+	Error string `json:"error"`
+}
+
+func WriteOAuthCallbackFile(authDir, provider, state, code, errorMessage string) (string, error) {
+	if strings.TrimSpace(authDir) == "" {
+		return "", fmt.Errorf("auth dir is empty")
+	}
+	canonicalProvider, err := NormalizeOAuthProvider(provider)
+	if err != nil {
+		return "", err
+	}
+	if err := ValidateOAuthState(state); err != nil {
+		return "", err
+	}
+
+	fileName := fmt.Sprintf(".oauth-%s-%s.oauth", canonicalProvider, state)
+	filePath := filepath.Join(authDir, fileName)
+	payload := oauthCallbackFilePayload{
+		Code:  strings.TrimSpace(code),
+		State: strings.TrimSpace(state),
+		Error: strings.TrimSpace(errorMessage),
+	}
+	data, err := json.Marshal(payload)
+	if err != nil {
+		return "", fmt.Errorf("marshal oauth callback payload: %w", err)
+	}
+	if err := os.WriteFile(filePath, data, 0o600); err != nil {
+		return "", fmt.Errorf("write oauth callback file: %w", err)
+	}
+	return filePath, nil
+}
+
+func WriteOAuthCallbackFileForPendingSession(authDir, provider, state, code, errorMessage string) (string, error) {
+	canonicalProvider, err := NormalizeOAuthProvider(provider)
+	if err != nil {
+		return "", err
+	}
+	if !IsOAuthSessionPending(state, canonicalProvider) {
+		return "", errOAuthSessionNotPending
+	}
+	return WriteOAuthCallbackFile(authDir, canonicalProvider, state, code, errorMessage)
+}
--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -95,6 +95,20 @@ func (m *AmpModule) managementAvailabilityMiddleware() gin.HandlerFunc {
 	}
 }

+// wrapManagementAuth skips auth for selected management paths while keeping authentication elsewhere.
+func wrapManagementAuth(auth gin.HandlerFunc, prefixes ...string) gin.HandlerFunc {
+	return func(c *gin.Context) {
+		path := c.Request.URL.Path
+		for _, prefix := range prefixes {
+			if strings.HasPrefix(path, prefix) && (len(path) == len(prefix) || path[len(prefix)] == '/') {
+				c.Next()
+				return
+			}
+		}
+		auth(c)
+	}
+}
+
 // registerManagementRoutes registers Amp management proxy routes
 // These routes proxy through to the Amp control plane for OAuth, user management, etc.
 // Uses dynamic middleware and proxy getter for hot-reload support.
@@ -109,8 +123,10 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	ampAPI.Use(m.localhostOnlyMiddleware())

 	// Apply authentication middleware - requires valid API key in Authorization header
+	var authWithBypass gin.HandlerFunc
 	if auth != nil {
 		ampAPI.Use(auth)
+		authWithBypass = wrapManagementAuth(auth, "/threads", "/auth")
 	}

 	// Dynamic proxy handler that uses m.getProxy() for hot-reload support
@@ -156,8 +172,8 @@ func (m *AmpModule) registerManagementRoutes(engine *gin.Engine, baseHandler *ha
 	// Root-level routes that AMP CLI expects without /api prefix
 	// These need the same security middleware as the /api/* routes (dynamic for hot-reload)
 	rootMiddleware := []gin.HandlerFunc{m.managementAvailabilityMiddleware(), noCORSMiddleware(), m.localhostOnlyMiddleware()}
-	if auth != nil {
-		rootMiddleware = append(rootMiddleware, auth)
+	if authWithBypass != nil {
+		rootMiddleware = append(rootMiddleware, authWithBypass)
 	}
 	engine.GET("/threads/*path", append(rootMiddleware, proxyHandler)...)
 	engine.GET("/threads.rss", append(rootMiddleware, proxyHandler)...)
--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -354,10 +354,11 @@ func (s *Server) setupRoutes() {
 		code := c.Query("code")
 		state := c.Query("state")
 		errStr := c.Query("error")
-		// Persist to a temporary file keyed by state
+		if errStr == "" {
+			errStr = c.Query("error_description")
+		}
 		if state != "" {
-			file := fmt.Sprintf("%s/.oauth-anthropic-%s.oauth", s.cfg.AuthDir, state)
-			_ = os.WriteFile(file, []byte(fmt.Sprintf(`{"code":"%s","state":"%s","error":"%s"}`, code, state, errStr)), 0o600)
+			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "anthropic", state, code, errStr)
 		}
 		c.Header("Content-Type", "text/html; charset=utf-8")
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
@@ -367,9 +368,11 @@ func (s *Server) setupRoutes() {
 		code := c.Query("code")
 		state := c.Query("state")
 		errStr := c.Query("error")
+		if errStr == "" {
+			errStr = c.Query("error_description")
+		}
 		if state != "" {
-			file := fmt.Sprintf("%s/.oauth-codex-%s.oauth", s.cfg.AuthDir, state)
-			_ = os.WriteFile(file, []byte(fmt.Sprintf(`{"code":"%s","state":"%s","error":"%s"}`, code, state, errStr)), 0o600)
+			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "codex", state, code, errStr)
 		}
 		c.Header("Content-Type", "text/html; charset=utf-8")
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
@@ -379,9 +382,11 @@ func (s *Server) setupRoutes() {
 		code := c.Query("code")
 		state := c.Query("state")
 		errStr := c.Query("error")
+		if errStr == "" {
+			errStr = c.Query("error_description")
+		}
 		if state != "" {
-			file := fmt.Sprintf("%s/.oauth-gemini-%s.oauth", s.cfg.AuthDir, state)
-			_ = os.WriteFile(file, []byte(fmt.Sprintf(`{"code":"%s","state":"%s","error":"%s"}`, code, state, errStr)), 0o600)
+			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "gemini", state, code, errStr)
 		}
 		c.Header("Content-Type", "text/html; charset=utf-8")
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
@@ -391,9 +396,11 @@ func (s *Server) setupRoutes() {
 		code := c.Query("code")
 		state := c.Query("state")
 		errStr := c.Query("error")
+		if errStr == "" {
+			errStr = c.Query("error_description")
+		}
 		if state != "" {
-			file := fmt.Sprintf("%s/.oauth-iflow-%s.oauth", s.cfg.AuthDir, state)
-			_ = os.WriteFile(file, []byte(fmt.Sprintf(`{"code":"%s","state":"%s","error":"%s"}`, code, state, errStr)), 0o600)
+			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "iflow", state, code, errStr)
 		}
 		c.Header("Content-Type", "text/html; charset=utf-8")
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
@@ -403,9 +410,11 @@ func (s *Server) setupRoutes() {
 		code := c.Query("code")
 		state := c.Query("state")
 		errStr := c.Query("error")
+		if errStr == "" {
+			errStr = c.Query("error_description")
+		}
 		if state != "" {
-			file := fmt.Sprintf("%s/.oauth-antigravity-%s.oauth", s.cfg.AuthDir, state)
-			_ = os.WriteFile(file, []byte(fmt.Sprintf(`{"code":"%s","state":"%s","error":"%s"}`, code, state, errStr)), 0o600)
+			_, _ = managementHandlers.WriteOAuthCallbackFileForPendingSession(s.cfg.AuthDir, "antigravity", state, code, errStr)
 		}
 		c.Header("Content-Type", "text/html; charset=utf-8")
 		c.String(http.StatusOK, oauthCallbackSuccessHTML)
@@ -577,6 +586,7 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/qwen-auth-url", s.mgmt.RequestQwenToken)
 		mgmt.GET("/iflow-auth-url", s.mgmt.RequestIFlowToken)
 		mgmt.POST("/iflow-auth-url", s.mgmt.RequestIFlowCookieToken)
+		mgmt.POST("/oauth-callback", s.mgmt.PostOAuthCallback)
 		mgmt.GET("/get-auth-status", s.mgmt.GetAuthStatus)
 	}
 }
--- a/internal/misc/codex_instructions.go
+++ b/internal/misc/codex_instructions.go
@@ -20,6 +20,7 @@ func CodexInstructionsForModel(modelName, systemInstructions string) (bool, stri
 	lastCodexMaxPrompt := ""
 	last51Prompt := ""
 	last52Prompt := ""
+	last52CodexPrompt := ""
 	// lastReviewPrompt := ""
 	for _, entry := range entries {
 		content, _ := codexInstructionsDir.ReadFile("codex_instructions/" + entry.Name())
@@ -36,12 +37,16 @@ func CodexInstructionsForModel(modelName, systemInstructions string) (bool, stri
 			last51Prompt = string(content)
 		} else if strings.HasPrefix(entry.Name(), "gpt_5_2_prompt.md") {
 			last52Prompt = string(content)
+		} else if strings.HasPrefix(entry.Name(), "gpt-5.2-codex_prompt.md") {
+			last52CodexPrompt = string(content)
 		} else if strings.HasPrefix(entry.Name(), "review_prompt.md") {
 			// lastReviewPrompt = string(content)
 		}
 	}
 	if strings.Contains(modelName, "codex-max") {
 		return false, lastCodexMaxPrompt
+	} else if strings.Contains(modelName, "5.2-codex") {
+		return false, last52CodexPrompt
 	} else if strings.Contains(modelName, "codex") {
 		return false, lastCodexPrompt
 	} else if strings.Contains(modelName, "5.1") {
--- a/internal/misc/codex_instructions/gpt-5.2-codex_prompt.md-001-f084e5264b1b0ae9eb8c63c950c0953f40966fed
+++ b/internal/misc/codex_instructions/gpt-5.2-codex_prompt.md-001-f084e5264b1b0ae9eb8c63c950c0953f40966fed
@@ -0,0 +1,117 @@
+You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
+
+## General
+
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+
+## Editing constraints
+
+- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
+- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- Try to use apply_patch for single file edits, but it is fine to explore other options to make the edit if it does not work well. Do not use apply_patch for changes that are auto-generated (i.e. generating package.json or running a lint or format command like gofmt) or when scripting is more efficient (such as search and replacing a string across a codebase).
+- You may be in a dirty git worktree.
+    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
+    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
+    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
+    * If the changes are in unrelated files, just ignore them and don't revert them.
+- Do not amend a commit unless explicitly requested to do so.
+- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+- **NEVER** use destructive commands like `git reset --hard` or `git checkout --` unless specifically requested or approved by the user.
+
+## Plan tool
+
+When using the planning tool:
+- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step plans.
+- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
+
+## Codex CLI harness, sandboxing, and approvals
+
+The Codex CLI harness supports several different configurations for sandboxing and escalation approvals that the user can choose from.
+
+Filesystem sandboxing defines which files can be read or written. The options for `sandbox_mode` are:
+- **read-only**: The sandbox only permits reading files.
+- **workspace-write**: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval.
+- **danger-full-access**: No filesystem sandboxing - all commands are permitted.
+
+Network sandboxing defines whether network can be accessed without approval. Options for `network_access` are:
+- **restricted**: Requires approval
+- **enabled**: No approval needed
+
+Approvals are your mechanism to get user consent to run shell commands without the sandbox. Possible configuration options for `approval_policy` are
+- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with `approval_policy == on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /var)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval. ALWAYS proceed to use the `sandbox_permissions` and `justification` parameters - do not message the user before requesting approval for the command.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (for all of these, you should weigh alternative paths that do not require approval)
+
+When `sandbox_mode` is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.
+
+Although they introduce friction to the user because your work is paused until the user responds, you should leverage them when necessary to accomplish important work. If the completing the task requires escalated permissions, Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
+
+When requesting approval to execute a command that will require escalated privileges:
+  - Provide the `sandbox_permissions` parameter with the value `"require_escalated"`
+  - Include a short, 1 sentence explanation for why you need escalated permissions in the justification parameter
+
+## Special user requests
+
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
+- If the user asks for a "review", default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.
+
+## Frontend tasks
+When doing frontend design tasks, avoid collapsing into "AI slop" or safe, average-looking layouts.
+Aim for interfaces that feel intentional, bold, and a bit surprising.
+- Typography: Use expressive, purposeful fonts and avoid default stacks (Inter, Roboto, Arial, system).
+- Color & Look: Choose a clear visual direction; define CSS variables; avoid purple-on-white defaults. No purple bias or dark mode bias.
+- Motion: Use a few meaningful animations (page-load, staggered reveals) instead of generic micro-motions.
+- Background: Don't rely on flat, single-color backgrounds; use gradients, shapes, or subtle patterns to build atmosphere.
+- Overall: Avoid boilerplate layouts and interchangeable UI patterns. Vary themes, type families, and visual languages across outputs.
+- Ensure the page loads properly on both desktop and mobile
+
+Exception: If working within an existing website or design system, preserve the established patterns, structure, and visual language.
+
+## Presenting your work and final message
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+- Default: be very concise; friendly coding teammate tone.
+- Ask only when needed; suggest ideas; mirror the user's style.
+- For substantial work, summarize clearly; follow final‑answer formatting.
+- Skip heavy formatting for simple confirmations.
+- Don't dump large files you've written; reference paths only.
+- No "save/copy this file" - User is on the same machine.
+- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.
+- For code changes:
+  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with "summary", just jump right in.
+  * If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.
+  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.
+- The user does not command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
+
+### Final answer structure and style guidelines
+
+- Plain text; CLI handles styling. Use structure only when it helps scanability.
+- Headers: optional; short Title Case (1-3 words) wrapped in **…**; no blank line before the first bullet; add only if they truly help.
+- Bullets: use - ; merge related points; keep to one line when possible; 4–6 per list ordered by importance; keep phrasing consistent.
+- Monospace: backticks for commands/paths/env vars/code ids and inline examples; use for literal keyword bullets; never combine with **.
+- Code samples or multi-line snippets should be wrapped in fenced code blocks; include an info string as often as possible.
+- Structure: group related bullets; order sections general → specific → supporting; for subsections, start with a bolded keyword bullet, then items; match complexity to the task.
+- Tone: collaborative, concise, factual; present tense, active voice; self‑contained; no "above/below"; parallel wording.
+- Don'ts: no nested bullets/hierarchies; no ANSI codes; don't cram unrelated keywords; keep keyword lists short—wrap/reformat if long; avoid naming formatting styles in answers.
+- Adaptation: code explanations → precise, structured with code refs; simple tasks → lead with outcome; big changes → logical walkthrough + rationale + next actions; casual one-offs → plain sentences, no headers/bullets.
+- File References: When referencing files in your response follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Optionally include line/column (1‑based): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -160,7 +160,7 @@ func GetGeminiModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
@@ -175,7 +175,7 @@ func GetGeminiModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 	}
 }
@@ -240,7 +240,22 @@ func GetGeminiVertexModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                         "gemini-3-pro-image-preview",
@@ -255,7 +270,7 @@ func GetGeminiVertexModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
 		},
 	}
 }
@@ -317,11 +332,26 @@ func GetGeminiCLIModels() []*ModelInfo {
 			Name:                       "models/gemini-3-pro-preview",
 			Version:                    "3.0",
 			DisplayName:                "Gemini 3 Pro Preview",
-			Description:                "Gemini 3 Pro Preview",
+			Description:                "Our most intelligent model with SOTA reasoning and multimodal understanding, and powerful agentic and vibe coding capabilities",
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 	}
 }
@@ -387,7 +417,22 @@ func GetAIStudioModels() []*ModelInfo {
 			InputTokenLimit:            1048576,
 			OutputTokenLimit:           65536,
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
-			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}},
+		},
+		{
+			ID:                         "gemini-3-flash-preview",
+			Object:                     "model",
+			Created:                    1765929600,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-flash-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Flash Preview",
+			Description:                "Our most intelligent model built for speed, combining frontier intelligence with superior search and grounding.",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}},
 		},
 		{
 			ID:                         "gemini-pro-latest",
@@ -582,6 +627,20 @@ func GetOpenAIModels() []*ModelInfo {
 			SupportedParameters: []string{"tools"},
 			Thinking:            &ThinkingSupport{Levels: []string{"none", "low", "medium", "high", "xhigh"}},
 		},
+		{
+			ID:                  "gpt-5.2-codex",
+			Object:              "model",
+			Created:             1765440000,
+			OwnedBy:             "openai",
+			Type:                "openai",
+			Version:             "gpt-5.2",
+			DisplayName:         "GPT 5.2 Codex",
+			Description:         "Stable version of GPT 5.2 Codex, The best model for coding and agentic tasks across domains.",
+			ContextLength:       400000,
+			MaxCompletionTokens: 128000,
+			SupportedParameters: []string{"tools"},
+			Thinking:            &ThinkingSupport{Levels: []string{"low", "medium", "high", "xhigh"}},
+		},
 	}
 }

@@ -698,8 +757,9 @@ func GetAntigravityModelConfig() map[string]*AntigravityModelConfig {
 		"gemini-2.5-flash":                        {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash"},
 		"gemini-2.5-flash-lite":                   {Thinking: &ThinkingSupport{Min: 0, Max: 24576, ZeroAllowed: true, DynamicAllowed: true}, Name: "models/gemini-2.5-flash-lite"},
 		"gemini-2.5-computer-use-preview-10-2025": {Name: "models/gemini-2.5-computer-use-preview-10-2025"},
-		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-preview"},
-		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true}, Name: "models/gemini-3-pro-image-preview"},
+		"gemini-3-pro-preview":                    {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-preview"},
+		"gemini-3-pro-image-preview":              {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"low", "high"}}, Name: "models/gemini-3-pro-image-preview"},
+		"gemini-3-flash-preview":                  {Thinking: &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true, Levels: []string{"minimal", "low", "medium", "high"}}, Name: "models/gemini-3-flash-preview"},
 		"gemini-claude-sonnet-4-5-thinking":       {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 		"gemini-claude-opus-4-5-thinking":         {Thinking: &ThinkingSupport{Min: 1024, Max: 200000, ZeroAllowed: false, DynamicAllowed: true}, MaxCompletionTokens: 64000},
 	}
--- a/internal/runtime/executor/aistudio_executor.go
+++ b/internal/runtime/executor/aistudio_executor.go
@@ -323,8 +323,9 @@ func (e *AIStudioExecutor) translateRequest(req cliproxyexecutor.Request, opts c
 	to := sdktranslator.FromString("gemini")
 	payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), stream)
 	payload = ApplyThinkingMetadata(payload, req.Metadata, req.Model)
+	payload = util.ApplyGemini3ThinkingLevelFromMetadata(req.Model, req.Metadata, payload)
 	payload = util.ApplyDefaultThinkingIfNeeded(req.Model, payload)
-	payload = util.ConvertThinkingLevelToBudget(payload)
+	payload = util.ConvertThinkingLevelToBudget(payload, req.Model)
 	payload = util.NormalizeGeminiThinkingBudget(req.Model, payload)
 	payload = util.StripThinkingConfigIfUnsupported(req.Model, payload)
 	payload = fixGeminiImageAspectRatio(req.Model, payload)
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -32,15 +32,16 @@ import (
 const (
 	antigravityBaseURLDaily = "https://daily-cloudcode-pa.sandbox.googleapis.com"
 	// antigravityBaseURLAutopush     = "https://autopush-cloudcode-pa.sandbox.googleapis.com"
-	antigravityBaseURLProd  = "https://cloudcode-pa.googleapis.com"
-	antigravityStreamPath   = "/v1internal:streamGenerateContent"
-	antigravityGeneratePath = "/v1internal:generateContent"
-	antigravityModelsPath   = "/v1internal:fetchAvailableModels"
-	antigravityClientID     = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
-	antigravityClientSecret = "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"
-	defaultAntigravityAgent = "antigravity/1.11.5 windows/amd64"
-	antigravityAuthType     = "antigravity"
-	refreshSkew             = 3000 * time.Second
+	antigravityBaseURLProd     = "https://cloudcode-pa.googleapis.com"
+	antigravityCountTokensPath = "/v1internal:countTokens"
+	antigravityStreamPath      = "/v1internal:streamGenerateContent"
+	antigravityGeneratePath    = "/v1internal:generateContent"
+	antigravityModelsPath      = "/v1internal:fetchAvailableModels"
+	antigravityClientID        = "1071006060591-tmhssin2h21lcre235vtolojh4g403ep.apps.googleusercontent.com"
+	antigravityClientSecret    = "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"
+	defaultAntigravityAgent    = "antigravity/1.11.5 windows/amd64"
+	antigravityAuthType        = "antigravity"
+	refreshSkew                = 3000 * time.Second
 )

 var randSource = rand.New(rand.NewSource(time.Now().UnixNano()))
@@ -89,6 +90,7 @@ func (e *AntigravityExecutor) Execute(ctx context.Context, auth *cliproxyauth.Au
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
 	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

@@ -182,6 +184,7 @@ func (e *AntigravityExecutor) executeClaudeNonStream(ctx context.Context, auth *
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
 	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

@@ -514,6 +517,7 @@ func (e *AntigravityExecutor) ExecuteStream(ctx context.Context, auth *cliproxya
 	translated := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)

 	translated = applyThinkingMetadataCLI(translated, req.Metadata, req.Model)
+	translated = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, translated)
 	translated = util.ApplyDefaultThinkingIfNeededCLI(req.Model, translated)
 	translated = normalizeAntigravityThinking(req.Model, translated)

@@ -646,9 +650,131 @@ func (e *AntigravityExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Au
 	return updated, nil
 }

-// CountTokens counts tokens for the given request (not supported for Antigravity).
-func (e *AntigravityExecutor) CountTokens(context.Context, *cliproxyauth.Auth, cliproxyexecutor.Request, cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
-	return cliproxyexecutor.Response{}, statusErr{code: http.StatusNotImplemented, msg: "count tokens not supported"}
+// CountTokens counts tokens for the given request using the Antigravity API.
+func (e *AntigravityExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (cliproxyexecutor.Response, error) {
+	token, updatedAuth, errToken := e.ensureAccessToken(ctx, auth)
+	if errToken != nil {
+		return cliproxyexecutor.Response{}, errToken
+	}
+	if updatedAuth != nil {
+		auth = updatedAuth
+	}
+	if strings.TrimSpace(token) == "" {
+		return cliproxyexecutor.Response{}, statusErr{code: http.StatusUnauthorized, msg: "missing access token"}
+	}
+
+	from := opts.SourceFormat
+	to := sdktranslator.FromString("antigravity")
+	respCtx := context.WithValue(ctx, "alt", opts.Alt)
+
+	baseURLs := antigravityBaseURLFallbackOrder(auth)
+	httpClient := newProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
+
+	var authID, authLabel, authType, authValue string
+	if auth != nil {
+		authID = auth.ID
+		authLabel = auth.Label
+		authType, authValue = auth.AccountInfo()
+	}
+
+	var lastStatus int
+	var lastBody []byte
+	var lastErr error
+
+	for idx, baseURL := range baseURLs {
+		payload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
+		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+		payload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, payload)
+		payload = normalizeAntigravityThinking(req.Model, payload)
+		payload = deleteJSONField(payload, "project")
+		payload = deleteJSONField(payload, "model")
+		payload = deleteJSONField(payload, "request.safetySettings")
+
+		base := strings.TrimSuffix(baseURL, "/")
+		if base == "" {
+			base = buildBaseURL(auth)
+		}
+
+		var requestURL strings.Builder
+		requestURL.WriteString(base)
+		requestURL.WriteString(antigravityCountTokensPath)
+		if opts.Alt != "" {
+			requestURL.WriteString("?$alt=")
+			requestURL.WriteString(url.QueryEscape(opts.Alt))
+		}
+
+		httpReq, errReq := http.NewRequestWithContext(ctx, http.MethodPost, requestURL.String(), bytes.NewReader(payload))
+		if errReq != nil {
+			return cliproxyexecutor.Response{}, errReq
+		}
+		httpReq.Header.Set("Content-Type", "application/json")
+		httpReq.Header.Set("Authorization", "Bearer "+token)
+		httpReq.Header.Set("User-Agent", resolveUserAgent(auth))
+		httpReq.Header.Set("Accept", "application/json")
+		if host := resolveHost(base); host != "" {
+			httpReq.Host = host
+		}
+
+		recordAPIRequest(ctx, e.cfg, upstreamRequestLog{
+			URL:       requestURL.String(),
+			Method:    http.MethodPost,
+			Headers:   httpReq.Header.Clone(),
+			Body:      payload,
+			Provider:  e.Identifier(),
+			AuthID:    authID,
+			AuthLabel: authLabel,
+			AuthType:  authType,
+			AuthValue: authValue,
+		})
+
+		httpResp, errDo := httpClient.Do(httpReq)
+		if errDo != nil {
+			recordAPIResponseError(ctx, e.cfg, errDo)
+			lastStatus = 0
+			lastBody = nil
+			lastErr = errDo
+			if idx+1 < len(baseURLs) {
+				log.Debugf("antigravity executor: request error on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+				continue
+			}
+			return cliproxyexecutor.Response{}, errDo
+		}
+
+		recordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
+		bodyBytes, errRead := io.ReadAll(httpResp.Body)
+		if errClose := httpResp.Body.Close(); errClose != nil {
+			log.Errorf("antigravity executor: close response body error: %v", errClose)
+		}
+		if errRead != nil {
+			recordAPIResponseError(ctx, e.cfg, errRead)
+			return cliproxyexecutor.Response{}, errRead
+		}
+		appendAPIResponseChunk(ctx, e.cfg, bodyBytes)
+
+		if httpResp.StatusCode >= http.StatusOK && httpResp.StatusCode < http.StatusMultipleChoices {
+			count := gjson.GetBytes(bodyBytes, "totalTokens").Int()
+			translated := sdktranslator.TranslateTokenCount(respCtx, to, from, count, bodyBytes)
+			return cliproxyexecutor.Response{Payload: []byte(translated)}, nil
+		}
+
+		lastStatus = httpResp.StatusCode
+		lastBody = append([]byte(nil), bodyBytes...)
+		lastErr = nil
+		if httpResp.StatusCode == http.StatusTooManyRequests && idx+1 < len(baseURLs) {
+			log.Debugf("antigravity executor: rate limited on base url %s, retrying with fallback base url: %s", baseURL, baseURLs[idx+1])
+			continue
+		}
+		return cliproxyexecutor.Response{}, statusErr{code: httpResp.StatusCode, msg: string(bodyBytes)}
+	}
+
+	switch {
+	case lastStatus != 0:
+		return cliproxyexecutor.Response{}, statusErr{code: lastStatus, msg: string(lastBody)}
+	case lastErr != nil:
+		return cliproxyexecutor.Response{}, lastErr
+	default:
+		return cliproxyexecutor.Response{}, statusErr{code: http.StatusServiceUnavailable, msg: "antigravity executor: no base url available"}
+	}
 }

 // FetchAntigravityModels retrieves available models using the supplied auth.
@@ -1114,6 +1240,8 @@ func modelName2Alias(modelName string) string {
 		return "gemini-3-pro-image-preview"
 	case "gemini-3-pro-high":
 		return "gemini-3-pro-preview"
+	case "gemini-3-flash":
+		return "gemini-3-flash-preview"
 	case "claude-sonnet-4-5":
 		return "gemini-claude-sonnet-4-5"
 	case "claude-sonnet-4-5-thinking":
@@ -1135,6 +1263,8 @@ func alias2ModelName(modelName string) string {
 		return "gemini-3-pro-image"
 	case "gemini-3-pro-preview":
 		return "gemini-3-pro-high"
+	case "gemini-3-flash-preview":
+		return "gemini-3-flash"
 	case "gemini-claude-sonnet-4-5":
 		return "claude-sonnet-4-5"
 	case "gemini-claude-sonnet-4-5-thinking":
--- a/internal/runtime/executor/gemini_cli_executor.go
+++ b/internal/runtime/executor/gemini_cli_executor.go
@@ -79,6 +79,7 @@ func (e *GeminiCLIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), false)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
@@ -217,6 +218,7 @@ func (e *GeminiCLIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyaut
 	to := sdktranslator.FromString("gemini-cli")
 	basePayload := sdktranslator.TranslateRequest(from, to, req.Model, bytes.Clone(req.Payload), true)
 	basePayload = applyThinkingMetadataCLI(basePayload, req.Metadata, req.Model)
+	basePayload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, basePayload)
 	basePayload = util.ApplyDefaultThinkingIfNeededCLI(req.Model, basePayload)
 	basePayload = util.NormalizeGeminiCLIThinkingBudget(req.Model, basePayload)
 	basePayload = util.StripThinkingConfigIfUnsupported(req.Model, basePayload)
@@ -418,6 +420,7 @@ func (e *GeminiCLIExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.
 	for _, attemptModel := range models {
 		payload := sdktranslator.TranslateRequest(from, to, attemptModel, bytes.Clone(req.Payload), false)
 		payload = applyThinkingMetadataCLI(payload, req.Metadata, req.Model)
+		payload = util.ApplyGemini3ThinkingLevelFromMetadataCLI(req.Model, req.Metadata, payload)
 		payload = deleteJSONField(payload, "project")
 		payload = deleteJSONField(payload, "model")
 		payload = deleteJSONField(payload, "request.safetySettings")
--- a/internal/translator/gemini/claude/gemini_claude_response.go
+++ b/internal/translator/gemini/claude/gemini_claude_response.go
@@ -25,7 +25,7 @@ type Params struct {
 	HasFirstResponse bool
 	ResponseType     int
 	ResponseIndex    int
-	HasContent bool // Tracks whether any content (text, thinking, or tool use) has been output
+	HasContent       bool // Tracks whether any content (text, thinking, or tool use) has been output
 }

 // toolUseIDCounter provides a process-wide unique counter for tool use identifiers.
--- a/internal/util/gemini_thinking.go
+++ b/internal/util/gemini_thinking.go
@@ -1,6 +1,7 @@
 package util

 import (
+	"regexp"
 	"strings"

 	"github.com/tidwall/gjson"
@@ -13,6 +14,44 @@ const (
 	GeminiOriginalModelMetadataKey   = "gemini_original_model"
 )

+// Gemini model family detection patterns
+var (
+	gemini3Pattern      = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]`)
+	gemini3ProPattern   = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]pro`)
+	gemini3FlashPattern = regexp.MustCompile(`(?i)^gemini[_-]?3[_-]flash`)
+	gemini25Pattern     = regexp.MustCompile(`(?i)^gemini[_-]?2\.5[_-]`)
+)
+
+// IsGemini3Model returns true if the model is a Gemini 3 family model.
+// Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number).
+func IsGemini3Model(model string) bool {
+	return gemini3Pattern.MatchString(model)
+}
+
+// IsGemini3ProModel returns true if the model is a Gemini 3 Pro variant.
+// Gemini 3 Pro supports thinkingLevel: "low", "high" (default: "high")
+func IsGemini3ProModel(model string) bool {
+	return gemini3ProPattern.MatchString(model)
+}
+
+// IsGemini3FlashModel returns true if the model is a Gemini 3 Flash variant.
+// Gemini 3 Flash supports thinkingLevel: "minimal", "low", "medium", "high" (default: "high")
+func IsGemini3FlashModel(model string) bool {
+	return gemini3FlashPattern.MatchString(model)
+}
+
+// IsGemini25Model returns true if the model is a Gemini 2.5 family model.
+// Gemini 2.5 models should use thinkingBudget (number).
+func IsGemini25Model(model string) bool {
+	return gemini25Pattern.MatchString(model)
+}
+
+// Gemini3ProThinkingLevels are the valid thinkingLevel values for Gemini 3 Pro models.
+var Gemini3ProThinkingLevels = []string{"low", "high"}
+
+// Gemini3FlashThinkingLevels are the valid thinkingLevel values for Gemini 3 Flash models.
+var Gemini3FlashThinkingLevels = []string{"minimal", "low", "medium", "high"}
+
 func ApplyGeminiThinkingConfig(body []byte, budget *int, includeThoughts *bool) []byte {
 	if budget == nil && includeThoughts == nil {
 		return body
@@ -69,10 +108,141 @@ func ApplyGeminiCLIThinkingConfig(body []byte, budget *int, includeThoughts *boo
 	return updated
 }

+// ApplyGeminiThinkingLevel applies thinkingLevel config for Gemini 3 models.
+// For standard Gemini API format (generationConfig.thinkingConfig path).
+// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
+func ApplyGeminiThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
+	if level == "" && includeThoughts == nil {
+		return body
+	}
+	updated := body
+	if level != "" {
+		valuePath := "generationConfig.thinkingConfig.thinkingLevel"
+		rewritten, err := sjson.SetBytes(updated, valuePath, level)
+		if err == nil {
+			updated = rewritten
+		}
+	}
+	// Default to including thoughts when a level is set but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && level != "" {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
+		valuePath := "generationConfig.thinkingConfig.includeThoughts"
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
+		if err == nil {
+			updated = rewritten
+		}
+	}
+	return updated
+}
+
+// ApplyGeminiCLIThinkingLevel applies thinkingLevel config for Gemini 3 models.
+// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// Per Google's documentation, Gemini 3 models should use thinkingLevel instead of thinkingBudget.
+func ApplyGeminiCLIThinkingLevel(body []byte, level string, includeThoughts *bool) []byte {
+	if level == "" && includeThoughts == nil {
+		return body
+	}
+	updated := body
+	if level != "" {
+		valuePath := "request.generationConfig.thinkingConfig.thinkingLevel"
+		rewritten, err := sjson.SetBytes(updated, valuePath, level)
+		if err == nil {
+			updated = rewritten
+		}
+	}
+	// Default to including thoughts when a level is set but no explicit include flag is provided.
+	incl := includeThoughts
+	if incl == nil && level != "" {
+		defaultInclude := true
+		incl = &defaultInclude
+	}
+	if incl != nil {
+		valuePath := "request.generationConfig.thinkingConfig.includeThoughts"
+		rewritten, err := sjson.SetBytes(updated, valuePath, *incl)
+		if err == nil {
+			updated = rewritten
+		}
+	}
+	return updated
+}
+
+// ValidateGemini3ThinkingLevel validates that the thinkingLevel is valid for the Gemini 3 model variant.
+// Returns the validated level (normalized to lowercase) and true if valid, or empty string and false if invalid.
+func ValidateGemini3ThinkingLevel(model, level string) (string, bool) {
+	if level == "" {
+		return "", false
+	}
+	normalized := strings.ToLower(strings.TrimSpace(level))
+
+	var validLevels []string
+	if IsGemini3ProModel(model) {
+		validLevels = Gemini3ProThinkingLevels
+	} else if IsGemini3FlashModel(model) {
+		validLevels = Gemini3FlashThinkingLevels
+	} else if IsGemini3Model(model) {
+		// Unknown Gemini 3 variant - allow all levels as fallback
+		validLevels = Gemini3FlashThinkingLevels
+	} else {
+		return "", false
+	}
+
+	for _, valid := range validLevels {
+		if normalized == valid {
+			return normalized, true
+		}
+	}
+	return "", false
+}
+
+// ThinkingBudgetToGemini3Level converts a thinkingBudget to a thinkingLevel for Gemini 3 models.
+// This provides backward compatibility when thinkingBudget is provided for Gemini 3 models.
+// Returns the appropriate thinkingLevel and true if conversion is possible.
+func ThinkingBudgetToGemini3Level(model string, budget int) (string, bool) {
+	if !IsGemini3Model(model) {
+		return "", false
+	}
+
+	// Map budget to level based on Google's documentation
+	// Gemini 3 Pro: "low", "high" (default: "high")
+	// Gemini 3 Flash: "minimal", "low", "medium", "high" (default: "high")
+	switch {
+	case budget == -1:
+		// Dynamic budget maps to "high" (API default)
+		return "high", true
+	case budget == 0:
+		// Zero budget - Gemini 3 doesn't support disabling thinking
+		// Map to lowest available level
+		if IsGemini3FlashModel(model) {
+			return "minimal", true
+		}
+		return "low", true
+	case budget > 0 && budget <= 512:
+		if IsGemini3FlashModel(model) {
+			return "minimal", true
+		}
+		return "low", true
+	case budget <= 1024:
+		return "low", true
+	case budget <= 8192:
+		if IsGemini3FlashModel(model) {
+			return "medium", true
+		}
+		return "low", true // Pro doesn't have medium, use low
+	default:
+		return "high", true
+	}
+}
+
 // modelsWithDefaultThinking lists models that should have thinking enabled by default
 // when no explicit thinkingConfig is provided.
 var modelsWithDefaultThinking = map[string]bool{
-	"gemini-3-pro-preview": true,
+	"gemini-3-pro-preview":       true,
+	"gemini-3-pro-image-preview": true,
+	// "gemini-3-flash-preview":     true,
 }

 // ModelHasDefaultThinking returns true if the model should have thinking enabled by default.
@@ -83,6 +253,7 @@ func ModelHasDefaultThinking(model string) bool {
 // ApplyDefaultThinkingIfNeeded injects default thinkingConfig for models that require it.
 // For standard Gemini API format (generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
+// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
 func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 	if !ModelHasDefaultThinking(model) {
 		return body
@@ -90,14 +261,59 @@ func ApplyDefaultThinkingIfNeeded(model string, body []byte) []byte {
 	if gjson.GetBytes(body, "generationConfig.thinkingConfig").Exists() {
 		return body
 	}
+	// Gemini 3 models use thinkingLevel instead of thinkingBudget
+	if IsGemini3Model(model) {
+		// Don't set a default - let the API use its dynamic default ("high")
+		// Only set includeThoughts
+		updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.includeThoughts", true)
+		return updated
+	}
+	// Gemini 2.5 and other models use thinkingBudget
 	updated, _ := sjson.SetBytes(body, "generationConfig.thinkingConfig.thinkingBudget", -1)
 	updated, _ = sjson.SetBytes(updated, "generationConfig.thinkingConfig.include_thoughts", true)
 	return updated
 }

+// ApplyGemini3ThinkingLevelFromMetadata applies thinkingLevel from metadata for Gemini 3 models.
+// For standard Gemini API format (generationConfig.thinkingConfig path).
+// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
+func ApplyGemini3ThinkingLevelFromMetadata(model string, metadata map[string]any, body []byte) []byte {
+	if !IsGemini3Model(model) {
+		return body
+	}
+	effort, ok := ReasoningEffortFromMetadata(metadata)
+	if !ok || effort == "" {
+		return body
+	}
+	// Validate and apply the thinkingLevel
+	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+		return ApplyGeminiThinkingLevel(body, level, nil)
+	}
+	return body
+}
+
+// ApplyGemini3ThinkingLevelFromMetadataCLI applies thinkingLevel from metadata for Gemini 3 models.
+// For Gemini CLI API format (request.generationConfig.thinkingConfig path).
+// This handles the case where reasoning_effort is specified via model name suffix (e.g., model(minimal)).
+func ApplyGemini3ThinkingLevelFromMetadataCLI(model string, metadata map[string]any, body []byte) []byte {
+	if !IsGemini3Model(model) {
+		return body
+	}
+	effort, ok := ReasoningEffortFromMetadata(metadata)
+	if !ok || effort == "" {
+		return body
+	}
+	// Validate and apply the thinkingLevel
+	if level, valid := ValidateGemini3ThinkingLevel(model, effort); valid {
+		return ApplyGeminiCLIThinkingLevel(body, level, nil)
+	}
+	return body
+}
+
 // ApplyDefaultThinkingIfNeededCLI injects default thinkingConfig for models that require it.
 // For Gemini CLI API format (request.generationConfig.thinkingConfig path).
 // Returns the modified body if thinkingConfig was added, otherwise returns the original.
+// For Gemini 3 models, uses thinkingLevel instead of thinkingBudget per Google's documentation.
 func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
 	if !ModelHasDefaultThinking(model) {
 		return body
@@ -105,6 +321,14 @@ func ApplyDefaultThinkingIfNeededCLI(model string, body []byte) []byte {
 	if gjson.GetBytes(body, "request.generationConfig.thinkingConfig").Exists() {
 		return body
 	}
+	// Gemini 3 models use thinkingLevel instead of thinkingBudget
+	if IsGemini3Model(model) {
+		// Don't set a default - let the API use its dynamic default ("high")
+		// Only set includeThoughts
+		updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.includeThoughts", true)
+		return updated
+	}
+	// Gemini 2.5 and other models use thinkingBudget
 	updated, _ := sjson.SetBytes(body, "request.generationConfig.thinkingConfig.thinkingBudget", -1)
 	updated, _ = sjson.SetBytes(updated, "request.generationConfig.thinkingConfig.include_thoughts", true)
 	return updated
@@ -128,12 +352,29 @@ func StripThinkingConfigIfUnsupported(model string, body []byte) []byte {

 // NormalizeGeminiThinkingBudget normalizes the thinkingBudget value in a standard Gemini
 // request body (generationConfig.thinkingConfig.thinkingBudget path).
+// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation.
 func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {
 	const budgetPath = "generationConfig.thinkingConfig.thinkingBudget"
+	const levelPath = "generationConfig.thinkingConfig.thinkingLevel"
+
 	budget := gjson.GetBytes(body, budgetPath)
 	if !budget.Exists() {
 		return body
 	}
+
+	// For Gemini 3 models, convert thinkingBudget to thinkingLevel
+	if IsGemini3Model(model) {
+		if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
+			updated, _ := sjson.SetBytes(body, levelPath, level)
+			updated, _ = sjson.DeleteBytes(updated, budgetPath)
+			return updated
+		}
+		// If conversion fails, just remove the budget (let API use default)
+		updated, _ := sjson.DeleteBytes(body, budgetPath)
+		return updated
+	}
+
+	// For Gemini 2.5 and other models, normalize the budget value
 	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
 	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
 	return updated
@@ -141,12 +382,29 @@ func NormalizeGeminiThinkingBudget(model string, body []byte) []byte {

 // NormalizeGeminiCLIThinkingBudget normalizes the thinkingBudget value in a Gemini CLI
 // request body (request.generationConfig.thinkingConfig.thinkingBudget path).
+// For Gemini 3 models, converts thinkingBudget to thinkingLevel per Google's documentation.
 func NormalizeGeminiCLIThinkingBudget(model string, body []byte) []byte {
 	const budgetPath = "request.generationConfig.thinkingConfig.thinkingBudget"
+	const levelPath = "request.generationConfig.thinkingConfig.thinkingLevel"
+
 	budget := gjson.GetBytes(body, budgetPath)
 	if !budget.Exists() {
 		return body
 	}
+
+	// For Gemini 3 models, convert thinkingBudget to thinkingLevel
+	if IsGemini3Model(model) {
+		if level, ok := ThinkingBudgetToGemini3Level(model, int(budget.Int())); ok {
+			updated, _ := sjson.SetBytes(body, levelPath, level)
+			updated, _ = sjson.DeleteBytes(updated, budgetPath)
+			return updated
+		}
+		// If conversion fails, just remove the budget (let API use default)
+		updated, _ := sjson.DeleteBytes(body, budgetPath)
+		return updated
+	}
+
+	// For Gemini 2.5 and other models, normalize the budget value
 	normalized := NormalizeThinkingBudget(model, int(budget.Int()))
 	updated, _ := sjson.SetBytes(body, budgetPath, normalized)
 	return updated
@@ -218,34 +476,42 @@ func ApplyReasoningEffortToGeminiCLI(body []byte, effort string) []byte {
 }

 // ConvertThinkingLevelToBudget checks for "generationConfig.thinkingConfig.thinkingLevel"
-// and converts it to "thinkingBudget".
-// "high" -> 32768
-// "low" -> 128
-// It removes "thinkingLevel" after conversion.
-func ConvertThinkingLevelToBudget(body []byte) []byte {
+// and converts it to "thinkingBudget" for Gemini 2.5 models.
+// For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
+// Mappings for Gemini 2.5:
+//   - "high" -> 32768
+//   - "medium" -> 8192
+//   - "low" -> 1024
+//   - "minimal" -> 512
+//
+// It removes "thinkingLevel" after conversion (for Gemini 2.5 only).
+func ConvertThinkingLevelToBudget(body []byte, model string) []byte {
 	levelPath := "generationConfig.thinkingConfig.thinkingLevel"
 	res := gjson.GetBytes(body, levelPath)
 	if !res.Exists() {
 		return body
 	}

+	// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
+	if IsGemini3Model(model) {
+		return body
+	}
+
 	level := strings.ToLower(res.String())
 	var budget int
 	switch level {
 	case "high":
 		budget = 32768
+	case "medium":
+		budget = 8192
 	case "low":
-		budget = 128
+		budget = 1024
+	case "minimal":
+		budget = 512
 	default:
-		// If unknown level, we might just leave it or default.
-		// User only specified high and low. We'll assume we shouldn't touch it if it's something else,
-		// or maybe we should just remove the invalid level?
-		// For safety adhering to strict instructions: "If high... if low...".
-		// If it's something else, the upstream might fail anyway if we leave it,
-		// but let's just delete the level if we processed it.
-		// Actually, let's check if we need to do anything for other values.
-		// For now, only handle high/low.
-		return body
+		// Unknown level - remove it and let the API use defaults
+		updated, _ := sjson.DeleteBytes(body, levelPath)
+		return updated
 	}

 	// Set budget
@@ -262,3 +528,50 @@ func ConvertThinkingLevelToBudget(body []byte) []byte {
 	}
 	return updated
 }
+
+// ConvertThinkingLevelToBudgetCLI checks for "request.generationConfig.thinkingConfig.thinkingLevel"
+// and converts it to "thinkingBudget" for Gemini 2.5 models.
+// For Gemini 3 models, preserves thinkingLevel as-is (does not convert).
+func ConvertThinkingLevelToBudgetCLI(body []byte, model string) []byte {
+	levelPath := "request.generationConfig.thinkingConfig.thinkingLevel"
+	res := gjson.GetBytes(body, levelPath)
+	if !res.Exists() {
+		return body
+	}
+
+	// For Gemini 3 models, preserve thinkingLevel - don't convert to budget
+	if IsGemini3Model(model) {
+		return body
+	}
+
+	level := strings.ToLower(res.String())
+	var budget int
+	switch level {
+	case "high":
+		budget = 32768
+	case "medium":
+		budget = 8192
+	case "low":
+		budget = 1024
+	case "minimal":
+		budget = 512
+	default:
+		// Unknown level - remove it and let the API use defaults
+		updated, _ := sjson.DeleteBytes(body, levelPath)
+		return updated
+	}
+
+	// Set budget
+	budgetPath := "request.generationConfig.thinkingConfig.thinkingBudget"
+	updated, err := sjson.SetBytes(body, budgetPath, budget)
+	if err != nil {
+		return body
+	}
+
+	// Remove level
+	updated, err = sjson.DeleteBytes(updated, levelPath)
+	if err != nil {
+		return body
+	}
+	return updated
+}
--- a/test/gemini3_thinking_level_test.go
+++ b/test/gemini3_thinking_level_test.go
@@ -0,0 +1,423 @@
+package test
+
+import (
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
+	"github.com/tidwall/gjson"
+)
+
+// registerGemini3Models loads Gemini 3 models into the registry for testing.
+func registerGemini3Models(t *testing.T) func() {
+	t.Helper()
+	reg := registry.GetGlobalRegistry()
+	uid := fmt.Sprintf("gemini3-test-%d", time.Now().UnixNano())
+	reg.RegisterClient(uid+"-gemini", "gemini", registry.GetGeminiModels())
+	reg.RegisterClient(uid+"-aistudio", "aistudio", registry.GetAIStudioModels())
+	return func() {
+		reg.UnregisterClient(uid + "-gemini")
+		reg.UnregisterClient(uid + "-aistudio")
+	}
+}
+
+func TestIsGemini3Model(t *testing.T) {
+	cases := []struct {
+		model    string
+		expected bool
+	}{
+		{"gemini-3-pro-preview", true},
+		{"gemini-3-flash-preview", true},
+		{"gemini_3_pro_preview", true},
+		{"gemini-3-pro", true},
+		{"gemini-3-flash", true},
+		{"GEMINI-3-PRO-PREVIEW", true},
+		{"gemini-2.5-pro", false},
+		{"gemini-2.5-flash", false},
+		{"gpt-5", false},
+		{"claude-sonnet-4-5", false},
+		{"", false},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.model, func(t *testing.T) {
+			got := util.IsGemini3Model(cs.model)
+			if got != cs.expected {
+				t.Fatalf("IsGemini3Model(%q) = %v, want %v", cs.model, got, cs.expected)
+			}
+		})
+	}
+}
+
+func TestIsGemini3ProModel(t *testing.T) {
+	cases := []struct {
+		model    string
+		expected bool
+	}{
+		{"gemini-3-pro-preview", true},
+		{"gemini_3_pro_preview", true},
+		{"gemini-3-pro", true},
+		{"GEMINI-3-PRO-PREVIEW", true},
+		{"gemini-3-flash-preview", false},
+		{"gemini-3-flash", false},
+		{"gemini-2.5-pro", false},
+		{"", false},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.model, func(t *testing.T) {
+			got := util.IsGemini3ProModel(cs.model)
+			if got != cs.expected {
+				t.Fatalf("IsGemini3ProModel(%q) = %v, want %v", cs.model, got, cs.expected)
+			}
+		})
+	}
+}
+
+func TestIsGemini3FlashModel(t *testing.T) {
+	cases := []struct {
+		model    string
+		expected bool
+	}{
+		{"gemini-3-flash-preview", true},
+		{"gemini_3_flash_preview", true},
+		{"gemini-3-flash", true},
+		{"GEMINI-3-FLASH-PREVIEW", true},
+		{"gemini-3-pro-preview", false},
+		{"gemini-3-pro", false},
+		{"gemini-2.5-flash", false},
+		{"", false},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.model, func(t *testing.T) {
+			got := util.IsGemini3FlashModel(cs.model)
+			if got != cs.expected {
+				t.Fatalf("IsGemini3FlashModel(%q) = %v, want %v", cs.model, got, cs.expected)
+			}
+		})
+	}
+}
+
+func TestValidateGemini3ThinkingLevel(t *testing.T) {
+	cases := []struct {
+		name    string
+		model   string
+		level   string
+		wantOK  bool
+		wantVal string
+	}{
+		// Gemini 3 Pro: supports "low", "high"
+		{"pro-low", "gemini-3-pro-preview", "low", true, "low"},
+		{"pro-high", "gemini-3-pro-preview", "high", true, "high"},
+		{"pro-minimal-invalid", "gemini-3-pro-preview", "minimal", false, ""},
+		{"pro-medium-invalid", "gemini-3-pro-preview", "medium", false, ""},
+
+		// Gemini 3 Flash: supports "minimal", "low", "medium", "high"
+		{"flash-minimal", "gemini-3-flash-preview", "minimal", true, "minimal"},
+		{"flash-low", "gemini-3-flash-preview", "low", true, "low"},
+		{"flash-medium", "gemini-3-flash-preview", "medium", true, "medium"},
+		{"flash-high", "gemini-3-flash-preview", "high", true, "high"},
+
+		// Case insensitivity
+		{"flash-LOW-case", "gemini-3-flash-preview", "LOW", true, "low"},
+		{"flash-High-case", "gemini-3-flash-preview", "High", true, "high"},
+		{"pro-HIGH-case", "gemini-3-pro-preview", "HIGH", true, "high"},
+
+		// Invalid levels
+		{"flash-invalid", "gemini-3-flash-preview", "xhigh", false, ""},
+		{"flash-invalid-auto", "gemini-3-flash-preview", "auto", false, ""},
+		{"flash-empty", "gemini-3-flash-preview", "", false, ""},
+
+		// Non-Gemini 3 models
+		{"non-gemini3", "gemini-2.5-pro", "high", false, ""},
+		{"gpt5", "gpt-5", "high", false, ""},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.name, func(t *testing.T) {
+			got, ok := util.ValidateGemini3ThinkingLevel(cs.model, cs.level)
+			if ok != cs.wantOK {
+				t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) ok = %v, want %v", cs.model, cs.level, ok, cs.wantOK)
+			}
+			if got != cs.wantVal {
+				t.Fatalf("ValidateGemini3ThinkingLevel(%q, %q) = %q, want %q", cs.model, cs.level, got, cs.wantVal)
+			}
+		})
+	}
+}
+
+func TestThinkingBudgetToGemini3Level(t *testing.T) {
+	cases := []struct {
+		name    string
+		model   string
+		budget  int
+		wantOK  bool
+		wantVal string
+	}{
+		// Gemini 3 Pro: maps to "low" or "high"
+		{"pro-dynamic", "gemini-3-pro-preview", -1, true, "high"},
+		{"pro-zero", "gemini-3-pro-preview", 0, true, "low"},
+		{"pro-small", "gemini-3-pro-preview", 1000, true, "low"},
+		{"pro-medium", "gemini-3-pro-preview", 8000, true, "low"},
+		{"pro-large", "gemini-3-pro-preview", 20000, true, "high"},
+		{"pro-huge", "gemini-3-pro-preview", 50000, true, "high"},
+
+		// Gemini 3 Flash: maps to "minimal", "low", "medium", "high"
+		{"flash-dynamic", "gemini-3-flash-preview", -1, true, "high"},
+		{"flash-zero", "gemini-3-flash-preview", 0, true, "minimal"},
+		{"flash-tiny", "gemini-3-flash-preview", 500, true, "minimal"},
+		{"flash-small", "gemini-3-flash-preview", 1000, true, "low"},
+		{"flash-medium-val", "gemini-3-flash-preview", 8000, true, "medium"},
+		{"flash-large", "gemini-3-flash-preview", 20000, true, "high"},
+		{"flash-huge", "gemini-3-flash-preview", 50000, true, "high"},
+
+		// Non-Gemini 3 models should return false
+		{"gemini25-budget", "gemini-2.5-pro", 8000, false, ""},
+		{"gpt5-budget", "gpt-5", 8000, false, ""},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.name, func(t *testing.T) {
+			got, ok := util.ThinkingBudgetToGemini3Level(cs.model, cs.budget)
+			if ok != cs.wantOK {
+				t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) ok = %v, want %v", cs.model, cs.budget, ok, cs.wantOK)
+			}
+			if got != cs.wantVal {
+				t.Fatalf("ThinkingBudgetToGemini3Level(%q, %d) = %q, want %q", cs.model, cs.budget, got, cs.wantVal)
+			}
+		})
+	}
+}
+
+func TestApplyGemini3ThinkingLevelFromMetadata(t *testing.T) {
+	cleanup := registerGemini3Models(t)
+	defer cleanup()
+
+	cases := []struct {
+		name         string
+		model        string
+		metadata     map[string]any
+		inputBody    string
+		wantLevel    string
+		wantInclude  bool
+		wantNoChange bool
+	}{
+		{
+			name:        "flash-minimal-from-suffix",
+			model:       "gemini-3-flash-preview",
+			metadata:    map[string]any{"reasoning_effort": "minimal"},
+			inputBody:   `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
+			wantLevel:   "minimal",
+			wantInclude: true,
+		},
+		{
+			name:        "flash-medium-from-suffix",
+			model:       "gemini-3-flash-preview",
+			metadata:    map[string]any{"reasoning_effort": "medium"},
+			inputBody:   `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
+			wantLevel:   "medium",
+			wantInclude: true,
+		},
+		{
+			name:        "pro-high-from-suffix",
+			model:       "gemini-3-pro-preview",
+			metadata:    map[string]any{"reasoning_effort": "high"},
+			inputBody:   `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
+			wantLevel:   "high",
+			wantInclude: true,
+		},
+		{
+			name:         "no-metadata-no-change",
+			model:        "gemini-3-flash-preview",
+			metadata:     nil,
+			inputBody:    `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
+			wantNoChange: true,
+		},
+		{
+			name:         "non-gemini3-no-change",
+			model:        "gemini-2.5-pro",
+			metadata:     map[string]any{"reasoning_effort": "high"},
+			inputBody:    `{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}`,
+			wantNoChange: true,
+		},
+		{
+			name:         "invalid-level-no-change",
+			model:        "gemini-3-flash-preview",
+			metadata:     map[string]any{"reasoning_effort": "xhigh"},
+			inputBody:    `{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}`,
+			wantNoChange: true,
+		},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.name, func(t *testing.T) {
+			input := []byte(cs.inputBody)
+			result := util.ApplyGemini3ThinkingLevelFromMetadata(cs.model, cs.metadata, input)
+
+			if cs.wantNoChange {
+				if string(result) != cs.inputBody {
+					t.Fatalf("expected no change, but got: %s", string(result))
+				}
+				return
+			}
+
+			level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
+			if !level.Exists() {
+				t.Fatalf("thinkingLevel not set in result: %s", string(result))
+			}
+			if level.String() != cs.wantLevel {
+				t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
+			}
+
+			include := gjson.GetBytes(result, "generationConfig.thinkingConfig.includeThoughts")
+			if cs.wantInclude && (!include.Exists() || !include.Bool()) {
+				t.Fatalf("includeThoughts should be true, got: %s", string(result))
+			}
+		})
+	}
+}
+
+func TestApplyGemini3ThinkingLevelFromMetadataCLI(t *testing.T) {
+	cleanup := registerGemini3Models(t)
+	defer cleanup()
+
+	cases := []struct {
+		name         string
+		model        string
+		metadata     map[string]any
+		inputBody    string
+		wantLevel    string
+		wantInclude  bool
+		wantNoChange bool
+	}{
+		{
+			name:        "flash-minimal-from-suffix-cli",
+			model:       "gemini-3-flash-preview",
+			metadata:    map[string]any{"reasoning_effort": "minimal"},
+			inputBody:   `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
+			wantLevel:   "minimal",
+			wantInclude: true,
+		},
+		{
+			name:        "flash-low-from-suffix-cli",
+			model:       "gemini-3-flash-preview",
+			metadata:    map[string]any{"reasoning_effort": "low"},
+			inputBody:   `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
+			wantLevel:   "low",
+			wantInclude: true,
+		},
+		{
+			name:        "pro-low-from-suffix-cli",
+			model:       "gemini-3-pro-preview",
+			metadata:    map[string]any{"reasoning_effort": "low"},
+			inputBody:   `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
+			wantLevel:   "low",
+			wantInclude: true,
+		},
+		{
+			name:         "no-metadata-no-change-cli",
+			model:        "gemini-3-flash-preview",
+			metadata:     nil,
+			inputBody:    `{"request":{"generationConfig":{"thinkingConfig":{"includeThoughts":true}}}}`,
+			wantNoChange: true,
+		},
+		{
+			name:         "non-gemini3-no-change-cli",
+			model:        "gemini-2.5-pro",
+			metadata:     map[string]any{"reasoning_effort": "high"},
+			inputBody:    `{"request":{"generationConfig":{"thinkingConfig":{"thinkingBudget":-1}}}}`,
+			wantNoChange: true,
+		},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.name, func(t *testing.T) {
+			input := []byte(cs.inputBody)
+			result := util.ApplyGemini3ThinkingLevelFromMetadataCLI(cs.model, cs.metadata, input)
+
+			if cs.wantNoChange {
+				if string(result) != cs.inputBody {
+					t.Fatalf("expected no change, but got: %s", string(result))
+				}
+				return
+			}
+
+			level := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.thinkingLevel")
+			if !level.Exists() {
+				t.Fatalf("thinkingLevel not set in result: %s", string(result))
+			}
+			if level.String() != cs.wantLevel {
+				t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
+			}
+
+			include := gjson.GetBytes(result, "request.generationConfig.thinkingConfig.includeThoughts")
+			if cs.wantInclude && (!include.Exists() || !include.Bool()) {
+				t.Fatalf("includeThoughts should be true, got: %s", string(result))
+			}
+		})
+	}
+}
+
+func TestNormalizeGeminiThinkingBudget_Gemini3Conversion(t *testing.T) {
+	cleanup := registerGemini3Models(t)
+	defer cleanup()
+
+	cases := []struct {
+		name       string
+		model      string
+		inputBody  string
+		wantLevel  string
+		wantBudget bool // if true, expect thinkingBudget instead of thinkingLevel
+	}{
+		{
+			name:      "gemini3-flash-budget-to-level",
+			model:     "gemini-3-flash-preview",
+			inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`,
+			wantLevel: "medium",
+		},
+		{
+			name:      "gemini3-pro-budget-to-level",
+			model:     "gemini-3-pro-preview",
+			inputBody: `{"generationConfig":{"thinkingConfig":{"thinkingBudget":20000}}}`,
+			wantLevel: "high",
+		},
+		{
+			name:       "gemini25-keeps-budget",
+			model:      "gemini-2.5-pro",
+			inputBody:  `{"generationConfig":{"thinkingConfig":{"thinkingBudget":8000}}}`,
+			wantBudget: true,
+		},
+	}
+
+	for _, cs := range cases {
+		t.Run(cs.name, func(t *testing.T) {
+			result := util.NormalizeGeminiThinkingBudget(cs.model, []byte(cs.inputBody))
+
+			if cs.wantBudget {
+				budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget")
+				if !budget.Exists() {
+					t.Fatalf("thinkingBudget should exist for non-Gemini3 model: %s", string(result))
+				}
+				level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
+				if level.Exists() {
+					t.Fatalf("thinkingLevel should not exist for non-Gemini3 model: %s", string(result))
+				}
+			} else {
+				level := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingLevel")
+				if !level.Exists() {
+					t.Fatalf("thinkingLevel should exist for Gemini3 model: %s", string(result))
+				}
+				if level.String() != cs.wantLevel {
+					t.Fatalf("thinkingLevel = %q, want %q", level.String(), cs.wantLevel)
+				}
+				budget := gjson.GetBytes(result, "generationConfig.thinkingConfig.thinkingBudget")
+				if budget.Exists() {
+					t.Fatalf("thinkingBudget should be removed for Gemini3 model: %s", string(result))
+				}
+			}
+		})
+	}
+}
Author	SHA1	Message	Date
Luis Pater	99478d13a8	Merge pull request #623 from router-for-me/remote-OAuth Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details Remote OAuth	2025-12-19 18:29:09 +08:00
Luis Pater	69d3a80fc3	Merge pull request #618 from router-for-me/amp fix(amp): add management auth skipper	2025-12-19 17:37:51 +08:00
Luis Pater	9e268ad103	Merge pull request #619 from router-for-me/gemini fix(util): disable default thinking for gemini 3 flash	2025-12-19 17:36:52 +08:00
hkfires	9d9b9e7a0d	fix(amp): add management auth skipper	2025-12-19 13:57:47 +08:00
hkfires	13aa82f3f3	fix(util): disable default thinking for gemini 3 flash	2025-12-19 13:11:15 +08:00
Luis Pater	05e55d7dc5	feat(codex): update gpt-5.2 codex prompt instructions Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details The prompt for the gpt-5.2 codex model has been updated with more comprehensive instructions. This includes detailed guidelines on general usage, editing constraints, the plan tool, sandboxing configurations, handling special user requests, frontend task considerations, and final message presentation. The updates aim to improve the model's understanding and execution of complex coding tasks by providing clearer directives and constraints.	2025-12-19 12:38:28 +08:00
Supra4E8C	1b358c931c	fix: restore get-auth-status ok fallback and document it	2025-12-19 12:15:22 +08:00
Luis Pater	ca09db21ff	feat(codex): add gpt-5.2 codex prompt handling Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details This change introduces specific logic to load and use instructions for the 'gpt-5.2-codex' model variant by recognizing the 'gpt-5.2-codex_prompt.md' filename. This ensures the correct prompts are used when the '5.2-codex' model is identified, complementing the recent addition of its definition.	2025-12-19 11:39:51 +08:00
Chén Mù	718ff7a73f	Merge pull request #609 from router-for-me/codex Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details feat(registry): add gpt 5.2 codex model definition	2025-12-19 09:54:34 +08:00
hkfires	fa70b220e9	feat(registry): add gpt 5.2 codex model definition	2025-12-19 09:53:03 +08:00
Luis Pater	774f1fbc17	Merge pull request #586 from router-for-me/chore chore: ignore gemini metadata files	2025-12-19 01:00:30 +08:00
Supra4E8C	cfa8ddb59f	feat(oauth): add remote OAuth callback support with session management Introduce a centralized OAuth session store with TTL-based expiration to replace the previous simple map-based status tracking. Add a new /api/oauth/callback endpoint that allows remote clients to relay OAuth callback data back to the CLI proxy, enabling OAuth flows when the callback cannot reach the local machine directly. - Add oauth_sessions.go with thread-safe session store and validation - Add oauth_callback.go with POST handler for remote callback relay - Refactor auth_files.go to use new session management APIs - Register new callback route in server.go	2025-12-19 00:38:29 +08:00
hkfires	393e38f2c0	chore: ignore gemini metadata files	2025-12-18 13:18:15 +08:00
Luis Pater	d1220de02d	chore(docs): remove legacy documentation and unused PR workflow file Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-12-18 08:21:58 +08:00
Luis Pater	13eb5268de	Merge pull request #582 from ben-vargas/fix-gemini-3-thinking-level feat: use thinkingLevel for Gemini 3 models per Google documentation	2025-12-18 07:19:37 +08:00
Ben Vargas	88798816f2	fix: require dot in gemini25Pattern regex for precise matching	2025-12-17 16:09:50 -07:00
Ben Vargas	598f0af19b	fix: apply thinkingLevel from model suffix metadata for Gemini 3 The previous commit added thinkingLevel support but didn't apply it when the reasoning effort came from model name suffix (e.g., model(minimal)). This was because ResolveThinkingConfigFromMetadata returns nil for level-based models, bypassing the metadata application. Changes: - Add ApplyGemini3ThinkingLevelFromMetadata for standard Gemini API - Add ApplyGemini3ThinkingLevelFromMetadataCLI for CLI API format - Update gemini_cli_executor to apply Gemini 3 thinkingLevel from metadata - Update antigravity_executor to apply Gemini 3 thinkingLevel from metadata - Update aistudio_executor to apply Gemini 3 thinkingLevel from metadata - Add comprehensive test coverage for Gemini 3 thinkingLevel functions	2025-12-17 16:08:38 -07:00
Ben Vargas	a33f5d31fc	feat: use thinkingLevel for Gemini 3 models per Google documentation Per Google's official documentation, Gemini 3 models should use thinkingLevel (string) instead of thinkingBudget (number) for optimal performance. From Google's Gemini Thinking docs: > Use the thinkingLevel parameter with Gemini 3 models. While > thinkingBudget is accepted for backwards compatibility, using > it with Gemini 3 Pro may result in suboptimal performance. Changes: - Add model family detection functions (IsGemini3Model, IsGemini25Model, IsGemini3ProModel, IsGemini3FlashModel) - Add ApplyGeminiThinkingLevel and ApplyGeminiCLIThinkingLevel functions for applying thinkingLevel config - Add ValidateGemini3ThinkingLevel for model-specific level validation - Add ThinkingBudgetToGemini3Level for backward compatibility conversion - Update NormalizeGeminiThinkingBudget to convert budget to level for Gemini 3 models - Update ApplyDefaultThinkingIfNeeded to not set a default level for Gemini 3 (lets API use its dynamic default "high") - Update ConvertThinkingLevelToBudget to preserve thinkingLevel for Gemini 3 models - Add Levels field to all Gemini 3 model definitions: - Gemini 3 Pro: ["low", "high"] - Gemini 3 Flash: ["minimal", "low", "medium", "high"] Backward compatibility: - Gemini 2.5 models continue to use thinkingBudget as before - If thinkingBudget is provided for Gemini 3, it's converted to the appropriate thinkingLevel - Existing configurations continue to work	2025-12-17 15:28:20 -07:00
Luis Pater	506699fba1	ci(workflows): update pr-test-build workflow	2025-12-18 03:28:23 +08:00
Luis Pater	68a27772b3	feat(antigravity): enable token counting via API with resilient routing Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details Introduces the capability to count tokens for Antigravity-backed requests. This implementation leverages the `countTokens` endpoint of the Antigravity API, replacing the prior unsupported stub. Key aspects of this update include: - API Integration: Direct integration with the Antigravity `countTokens` API, including necessary request payload translation and authentication. - Resilient Infrastructure: A fallback mechanism has been established, allowing the system to attempt connections across multiple Antigravity base URLs to ensure request success even in the event of temporary service interruptions. - Model Aliasing: Added mappings for `gemini-3-flash` and `gemini-3-flash-preview` to ensure compatibility with the latest model variants. - Robust Error Handling: Comprehensive error handling and logging are in place to manage failures during API interactions.	2025-12-18 03:12:46 +08:00
Ben Vargas	de87fb622b	docs: add redirect info and disable Pull app auto-sync	2025-12-17 12:06:39 -07:00
Luis Pater	f27672f6cf	feat(antigravity): add Gemini 3 Flash Preview model definition with enhanced capabilities Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details	2025-12-18 01:02:19 +08:00
Luis Pater	28420c14e4	Merge pull request #580 from router-for-me/chore chore: ignore agent and bmad artifacts	2025-12-18 00:46:25 +08:00
hkfires	9b956f6338	chore: ignore agent and bmad artifacts	2025-12-17 23:15:15 +08:00