**fix(translator): handle non-JSON output parsing for OpenAI function responses**

- Updated `antigravity_openai_request.go` to process non-JSON outputs gracefully by verifying and distinguishing between JSON and plain string formats. - Ensured proper assignment of parsed or raw response to `functionResponse`.
**feat(registry): add context length and update max tokens for Claude model configurations**
2025-11-27 16:18:49 +08:00 · 2025-11-27 16:13:25 +08:00 · 2025-11-27 16:03:24 +08:00 · 2025-11-27 15:59:15 +08:00 · 2025-11-26 22:31:05 -08:00 · 2025-11-27 10:25:45 +08:00
32 changed files with 1219 additions and 227 deletions
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -1,6 +1,12 @@
 # Server port
 port: 8317

+# TLS settings for HTTPS. When enabled, the server listens with the provided certificate and key.
+tls:
+  enable: false
+  cert: ""
+  key: ""
+
 # Management API settings
 remote-management:
  # Whether to allow remote (non-localhost) management access.
@@ -38,6 +44,9 @@ proxy-url: ""
 # Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504.
 request-retry: 3

+# Maximum wait time in seconds for a cooled-down credential before triggering a retry.
+max-retry-interval: 30
+
 # Quota exceeded behavior
 quota-exceeded:
  switch-project: true # Whether to automatically switch to another project when a quota is exceeded
--- a/internal/api/handlers/management/auth_files.go
+++ b/internal/api/handlers/management/auth_files.go
@@ -235,7 +235,11 @@ func (h *Handler) managementCallbackURL(path string) (string, error) {
 	if !strings.HasPrefix(path, "/") {
 		path = "/" + path
 	}
-	return fmt.Sprintf("http://127.0.0.1:%d%s", h.cfg.Port, path), nil
+	scheme := "http"
+	if h.cfg.TLS.Enable {
+		scheme = "https"
+	}
+	return fmt.Sprintf("%s://127.0.0.1:%d%s", scheme, h.cfg.Port, path), nil
 }

 func (h *Handler) ListAuthFiles(c *gin.Context) {
--- a/internal/api/handlers/management/config_basic.go
+++ b/internal/api/handlers/management/config_basic.go
@@ -172,6 +172,14 @@ func (h *Handler) PutRequestRetry(c *gin.Context) {
 	h.updateIntField(c, func(v int) { h.cfg.RequestRetry = v })
 }

+// Max retry interval
+func (h *Handler) GetMaxRetryInterval(c *gin.Context) {
+	c.JSON(200, gin.H{"max-retry-interval": h.cfg.MaxRetryInterval})
+}
+func (h *Handler) PutMaxRetryInterval(c *gin.Context) {
+	h.updateIntField(c, func(v int) { h.cfg.MaxRetryInterval = v })
+}
+
 // Proxy URL
 func (h *Handler) GetProxyURL(c *gin.Context) { c.JSON(200, gin.H{"proxy-url": h.cfg.ProxyURL}) }
 func (h *Handler) PutProxyURL(c *gin.Context) {
--- a/internal/api/handlers/management/logs.go
+++ b/internal/api/handlers/management/logs.go
@@ -58,8 +58,14 @@ func (h *Handler) GetLogs(c *gin.Context) {
 		return
 	}

+	limit, errLimit := parseLimit(c.Query("limit"))
+	if errLimit != nil {
+		c.JSON(http.StatusBadRequest, gin.H{"error": fmt.Sprintf("invalid limit: %v", errLimit)})
+		return
+	}
+
 	cutoff := parseCutoff(c.Query("after"))
-	acc := newLogAccumulator(cutoff)
+	acc := newLogAccumulator(cutoff, limit)
 	for i := range files {
 		if errProcess := acc.consumeFile(files[i]); errProcess != nil {
 			c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log file %s: %v", files[i], errProcess)})
@@ -139,6 +145,126 @@ func (h *Handler) DeleteLogs(c *gin.Context) {
 	})
 }

+// GetRequestErrorLogs lists error request log files when RequestLog is disabled.
+// It returns an empty list when RequestLog is enabled.
+func (h *Handler) GetRequestErrorLogs(c *gin.Context) {
+	if h == nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "handler unavailable"})
+		return
+	}
+	if h.cfg == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "configuration unavailable"})
+		return
+	}
+	if h.cfg.RequestLog {
+		c.JSON(http.StatusOK, gin.H{"files": []any{}})
+		return
+	}
+
+	dir := h.logDirectory()
+	if strings.TrimSpace(dir) == "" {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "log directory not configured"})
+		return
+	}
+
+	entries, err := os.ReadDir(dir)
+	if err != nil {
+		if os.IsNotExist(err) {
+			c.JSON(http.StatusOK, gin.H{"files": []any{}})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to list request error logs: %v", err)})
+		return
+	}
+
+	type errorLog struct {
+		Name     string `json:"name"`
+		Size     int64  `json:"size"`
+		Modified int64  `json:"modified"`
+	}
+
+	files := make([]errorLog, 0, len(entries))
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if !strings.HasPrefix(name, "error-") || !strings.HasSuffix(name, ".log") {
+			continue
+		}
+		info, errInfo := entry.Info()
+		if errInfo != nil {
+			c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log info for %s: %v", name, errInfo)})
+			return
+		}
+		files = append(files, errorLog{
+			Name:     name,
+			Size:     info.Size(),
+			Modified: info.ModTime().Unix(),
+		})
+	}
+
+	sort.Slice(files, func(i, j int) bool { return files[i].Modified > files[j].Modified })
+
+	c.JSON(http.StatusOK, gin.H{"files": files})
+}
+
+// DownloadRequestErrorLog downloads a specific error request log file by name.
+func (h *Handler) DownloadRequestErrorLog(c *gin.Context) {
+	if h == nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "handler unavailable"})
+		return
+	}
+	if h.cfg == nil {
+		c.JSON(http.StatusServiceUnavailable, gin.H{"error": "configuration unavailable"})
+		return
+	}
+
+	dir := h.logDirectory()
+	if strings.TrimSpace(dir) == "" {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": "log directory not configured"})
+		return
+	}
+
+	name := strings.TrimSpace(c.Param("name"))
+	if name == "" || strings.Contains(name, "/") || strings.Contains(name, "\\") {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file name"})
+		return
+	}
+	if !strings.HasPrefix(name, "error-") || !strings.HasSuffix(name, ".log") {
+		c.JSON(http.StatusNotFound, gin.H{"error": "log file not found"})
+		return
+	}
+
+	dirAbs, errAbs := filepath.Abs(dir)
+	if errAbs != nil {
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to resolve log directory: %v", errAbs)})
+		return
+	}
+	fullPath := filepath.Clean(filepath.Join(dirAbs, name))
+	prefix := dirAbs + string(os.PathSeparator)
+	if !strings.HasPrefix(fullPath, prefix) {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file path"})
+		return
+	}
+
+	info, errStat := os.Stat(fullPath)
+	if errStat != nil {
+		if os.IsNotExist(errStat) {
+			c.JSON(http.StatusNotFound, gin.H{"error": "log file not found"})
+			return
+		}
+		c.JSON(http.StatusInternalServerError, gin.H{"error": fmt.Sprintf("failed to read log file: %v", errStat)})
+		return
+	}
+	if info.IsDir() {
+		c.JSON(http.StatusBadRequest, gin.H{"error": "invalid log file"})
+		return
+	}
+
+	c.FileAttachment(fullPath, name)
+}
+
 func (h *Handler) logDirectory() string {
 	if h == nil {
 		return ""
@@ -194,16 +320,22 @@ func (h *Handler) collectLogFiles(dir string) ([]string, error) {

 type logAccumulator struct {
 	cutoff  int64
+	limit   int
 	lines   []string
 	total   int
 	latest  int64
 	include bool
 }

-func newLogAccumulator(cutoff int64) *logAccumulator {
+func newLogAccumulator(cutoff int64, limit int) *logAccumulator {
+	capacity := 256
+	if limit > 0 && limit < capacity {
+		capacity = limit
+	}
 	return &logAccumulator{
 		cutoff: cutoff,
-		lines:  make([]string, 0, 256),
+		limit:  limit,
+		lines:  make([]string, 0, capacity),
 	}
 }

@@ -215,7 +347,9 @@ func (acc *logAccumulator) consumeFile(path string) error {
 		}
 		return err
 	}
-	defer file.Close()
+	defer func() {
+		_ = file.Close()
+	}()

 	scanner := bufio.NewScanner(file)
 	buf := make([]byte, 0, logScannerInitialBuffer)
@@ -239,12 +373,19 @@ func (acc *logAccumulator) addLine(raw string) {
 	if ts > 0 {
 		acc.include = acc.cutoff == 0 || ts > acc.cutoff
 		if acc.cutoff == 0 || acc.include {
-			acc.lines = append(acc.lines, line)
+			acc.append(line)
 		}
 		return
 	}
 	if acc.cutoff == 0 || acc.include {
-		acc.lines = append(acc.lines, line)
+		acc.append(line)
+	}
+}
+
+func (acc *logAccumulator) append(line string) {
+	acc.lines = append(acc.lines, line)
+	if acc.limit > 0 && len(acc.lines) > acc.limit {
+		acc.lines = acc.lines[len(acc.lines)-acc.limit:]
 	}
 }

@@ -267,6 +408,21 @@ func parseCutoff(raw string) int64 {
 	return ts
 }

+func parseLimit(raw string) (int, error) {
+	value := strings.TrimSpace(raw)
+	if value == "" {
+		return 0, nil
+	}
+	limit, err := strconv.Atoi(value)
+	if err != nil {
+		return 0, fmt.Errorf("must be a positive integer")
+	}
+	if limit <= 0 {
+		return 0, fmt.Errorf("must be greater than zero")
+	}
+	return limit, nil
+}
+
 func parseTimestamp(line string) int64 {
 	if strings.HasPrefix(line, "[") {
 		line = line[1:]
--- a/internal/api/middleware/request_logging.go
+++ b/internal/api/middleware/request_logging.go
@@ -6,6 +6,7 @@ package middleware
 import (
 	"bytes"
 	"io"
+	"net/http"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -15,8 +16,8 @@ import (

 // RequestLoggingMiddleware creates a Gin middleware that logs HTTP requests and responses.
 // It captures detailed information about the request and response, including headers and body,
-// and uses the provided RequestLogger to record this data. If logging is disabled in the
-// logger, the middleware has minimal overhead.
+// and uses the provided RequestLogger to record this data. When logging is disabled in the
+// logger, it still captures data so that upstream errors can be persisted.
 func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 	return func(c *gin.Context) {
 		if logger == nil {
@@ -24,14 +25,13 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {
 			return
 		}

-		path := c.Request.URL.Path
-		if !shouldLogRequest(path) {
+		if c.Request.Method == http.MethodGet {
 			c.Next()
 			return
 		}

-		// Early return if logging is disabled (zero overhead)
-		if !logger.IsEnabled() {
+		path := c.Request.URL.Path
+		if !shouldLogRequest(path) {
 			c.Next()
 			return
 		}
@@ -47,6 +47,9 @@ func RequestLoggingMiddleware(logger logging.RequestLogger) gin.HandlerFunc {

 		// Create response writer wrapper
 		wrapper := NewResponseWriterWrapper(c.Writer, logger, requestInfo)
+		if !logger.IsEnabled() {
+			wrapper.logOnErrorOnly = true
+		}
 		c.Writer = wrapper

 		// Process the request
--- a/internal/api/middleware/response_writer.go
+++ b/internal/api/middleware/response_writer.go
@@ -5,6 +5,7 @@ package middleware

 import (
 	"bytes"
+	"net/http"
 	"strings"

 	"github.com/gin-gonic/gin"
@@ -24,15 +25,16 @@ type RequestInfo struct {
 // It is designed to handle both standard and streaming responses, ensuring that logging operations do not block the client response.
 type ResponseWriterWrapper struct {
 	gin.ResponseWriter
-	body         *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
-	isStreaming  bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
-	streamWriter logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
-	chunkChannel chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
-	streamDone   chan struct{}              // streamDone signals when the streaming goroutine completes.
-	logger       logging.RequestLogger      // logger is the instance of the request logger service.
-	requestInfo  *RequestInfo               // requestInfo holds the details of the original request.
-	statusCode   int                        // statusCode stores the HTTP status code of the response.
-	headers      map[string][]string        // headers stores the response headers.
+	body           *bytes.Buffer              // body is a buffer to store the response body for non-streaming responses.
+	isStreaming    bool                       // isStreaming indicates whether the response is a streaming type (e.g., text/event-stream).
+	streamWriter   logging.StreamingLogWriter // streamWriter is a writer for handling streaming log entries.
+	chunkChannel   chan []byte                // chunkChannel is a channel for asynchronously passing response chunks to the logger.
+	streamDone     chan struct{}              // streamDone signals when the streaming goroutine completes.
+	logger         logging.RequestLogger      // logger is the instance of the request logger service.
+	requestInfo    *RequestInfo               // requestInfo holds the details of the original request.
+	statusCode     int                        // statusCode stores the HTTP status code of the response.
+	headers        map[string][]string        // headers stores the response headers.
+	logOnErrorOnly bool                       // logOnErrorOnly enables logging only when an error response is detected.
 }

 // NewResponseWriterWrapper creates and initializes a new ResponseWriterWrapper.
@@ -192,12 +194,34 @@ func (w *ResponseWriterWrapper) processStreamingChunks(done chan struct{}) {
 // For non-streaming responses, it logs the complete request and response details,
 // including any API-specific request/response data stored in the Gin context.
 func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
-	if !w.logger.IsEnabled() {
+	if w.logger == nil {
+		return nil
+	}
+
+	finalStatusCode := w.statusCode
+	if finalStatusCode == 0 {
+		if statusWriter, ok := w.ResponseWriter.(interface{ Status() int }); ok {
+			finalStatusCode = statusWriter.Status()
+		} else {
+			finalStatusCode = 200
+		}
+	}
+
+	var slicesAPIResponseError []*interfaces.ErrorMessage
+	apiResponseError, isExist := c.Get("API_RESPONSE_ERROR")
+	if isExist {
+		if apiErrors, ok := apiResponseError.([]*interfaces.ErrorMessage); ok {
+			slicesAPIResponseError = apiErrors
+		}
+	}
+
+	hasAPIError := len(slicesAPIResponseError) > 0 || finalStatusCode >= http.StatusBadRequest
+	forceLog := w.logOnErrorOnly && hasAPIError && !w.logger.IsEnabled()
+	if !w.logger.IsEnabled() && !forceLog {
 		return nil
 	}

 	if w.isStreaming {
-		// Close streaming channel and writer
 		if w.chunkChannel != nil {
 			close(w.chunkChannel)
 			w.chunkChannel = nil
@@ -209,80 +233,98 @@ func (w *ResponseWriterWrapper) Finalize(c *gin.Context) error {
 		}

 		if w.streamWriter != nil {
-			err := w.streamWriter.Close()
+			if err := w.streamWriter.Close(); err != nil {
+				w.streamWriter = nil
+				return err
+			}
 			w.streamWriter = nil
-			return err
 		}
-	} else {
-		// Capture final status code and headers if not already captured
-		finalStatusCode := w.statusCode
-		if finalStatusCode == 0 {
-			// Get status from underlying ResponseWriter if available
-			if statusWriter, ok := w.ResponseWriter.(interface{ Status() int }); ok {
-				finalStatusCode = statusWriter.Status()
-			} else {
-				finalStatusCode = 200 // Default
-			}
+		if forceLog {
+			return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
 		}
+		return nil
+	}

-		// Ensure we have the latest headers before finalizing
-		w.ensureHeadersCaptured()
+	return w.logRequest(finalStatusCode, w.cloneHeaders(), w.body.Bytes(), w.extractAPIRequest(c), w.extractAPIResponse(c), slicesAPIResponseError, forceLog)
+}

-		// Use the captured headers as the final headers
-		finalHeaders := make(map[string][]string)
-		for key, values := range w.headers {
-			// Make a copy of the values slice to avoid reference issues
-			headerValues := make([]string, len(values))
-			copy(headerValues, values)
-			finalHeaders[key] = headerValues
-		}
+func (w *ResponseWriterWrapper) cloneHeaders() map[string][]string {
+	w.ensureHeadersCaptured()

-		var apiRequestBody []byte
-		apiRequest, isExist := c.Get("API_REQUEST")
-		if isExist {
-			var ok bool
-			apiRequestBody, ok = apiRequest.([]byte)
-			if !ok {
-				apiRequestBody = nil
-			}
-		}
+	finalHeaders := make(map[string][]string, len(w.headers))
+	for key, values := range w.headers {
+		headerValues := make([]string, len(values))
+		copy(headerValues, values)
+		finalHeaders[key] = headerValues
+	}

-		var apiResponseBody []byte
-		apiResponse, isExist := c.Get("API_RESPONSE")
-		if isExist {
-			var ok bool
-			apiResponseBody, ok = apiResponse.([]byte)
-			if !ok {
-				apiResponseBody = nil
-			}
-		}
+	return finalHeaders
+}

-		var slicesAPIResponseError []*interfaces.ErrorMessage
-		apiResponseError, isExist := c.Get("API_RESPONSE_ERROR")
-		if isExist {
-			var ok bool
-			slicesAPIResponseError, ok = apiResponseError.([]*interfaces.ErrorMessage)
-			if !ok {
-				slicesAPIResponseError = nil
-			}
-		}
+func (w *ResponseWriterWrapper) extractAPIRequest(c *gin.Context) []byte {
+	apiRequest, isExist := c.Get("API_REQUEST")
+	if !isExist {
+		return nil
+	}
+	data, ok := apiRequest.([]byte)
+	if !ok || len(data) == 0 {
+		return nil
+	}
+	return data
+}

-		// Log complete non-streaming response
-		return w.logger.LogRequest(
+func (w *ResponseWriterWrapper) extractAPIResponse(c *gin.Context) []byte {
+	apiResponse, isExist := c.Get("API_RESPONSE")
+	if !isExist {
+		return nil
+	}
+	data, ok := apiResponse.([]byte)
+	if !ok || len(data) == 0 {
+		return nil
+	}
+	return data
+}
+
+func (w *ResponseWriterWrapper) logRequest(statusCode int, headers map[string][]string, body []byte, apiRequestBody, apiResponseBody []byte, apiResponseErrors []*interfaces.ErrorMessage, forceLog bool) error {
+	if w.requestInfo == nil {
+		return nil
+	}
+
+	var requestBody []byte
+	if len(w.requestInfo.Body) > 0 {
+		requestBody = w.requestInfo.Body
+	}
+
+	if loggerWithOptions, ok := w.logger.(interface {
+		LogRequestWithOptions(string, string, map[string][]string, []byte, int, map[string][]string, []byte, []byte, []byte, []*interfaces.ErrorMessage, bool) error
+	}); ok {
+		return loggerWithOptions.LogRequestWithOptions(
 			w.requestInfo.URL,
 			w.requestInfo.Method,
 			w.requestInfo.Headers,
-			w.requestInfo.Body,
-			finalStatusCode,
-			finalHeaders,
-			w.body.Bytes(),
+			requestBody,
+			statusCode,
+			headers,
+			body,
 			apiRequestBody,
 			apiResponseBody,
-			slicesAPIResponseError,
+			apiResponseErrors,
+			forceLog,
 		)
 	}

-	return nil
+	return w.logger.LogRequest(
+		w.requestInfo.URL,
+		w.requestInfo.Method,
+		w.requestInfo.Headers,
+		requestBody,
+		statusCode,
+		headers,
+		body,
+		apiRequestBody,
+		apiResponseBody,
+		apiResponseErrors,
+	)
 }

 // Status returns the HTTP response status code captured by the wrapper.
--- a/internal/api/modules/amp/amp.go
+++ b/internal/api/modules/amp/amp.go
@@ -181,5 +181,3 @@ func (m *AmpModule) OnConfigUpdated(cfg *config.Config) error {
 	log.Debug("Amp config updated (restart required for URL changes)")
 	return nil
 }
-
-
--- a/internal/api/modules/amp/proxy.go
+++ b/internal/api/modules/amp/proxy.go
@@ -83,7 +83,7 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 		// Peek at first 2 bytes to detect gzip magic bytes
 		header := make([]byte, 2)
 		n, _ := io.ReadFull(originalBody, header)
-		
+
 		// Check for gzip magic bytes (0x1f 0x8b)
 		// If n < 2, we didn't get enough bytes, so it's not gzip
 		if n >= 2 && header[0] == 0x1f && header[1] == 0x8b {
@@ -97,7 +97,7 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 				}
 				return nil
 			}
-			
+
 			// Reconstruct complete gzipped data
 			gzippedData := append(header[:n], rest...)

@@ -129,8 +129,8 @@ func createReverseProxy(upstreamURL string, secretSource SecretSource) (*httputi
 			resp.ContentLength = int64(len(decompressed))

 			// Update headers to reflect decompressed state
-			resp.Header.Del("Content-Encoding")                                      // No longer compressed
-			resp.Header.Del("Content-Length")                                        // Remove stale compressed length
+			resp.Header.Del("Content-Encoding")                                          // No longer compressed
+			resp.Header.Del("Content-Length")                                            // Remove stale compressed length
 			resp.Header.Set("Content-Length", strconv.FormatInt(resp.ContentLength, 10)) // Set decompressed length

 			log.Debugf("amp proxy: decompressed gzip response (%d -> %d bytes)", len(gzippedData), len(decompressed))
--- a/internal/api/modules/amp/proxy_test.go
+++ b/internal/api/modules/amp/proxy_test.go
@@ -440,52 +440,52 @@ func TestIsStreamingResponse(t *testing.T) {

 func TestFilterBetaFeatures(t *testing.T) {
 	tests := []struct {
-		name           string
-		header         string
+		name            string
+		header          string
 		featureToRemove string
-		expected       string
+		expected        string
 	}{
 		{
-			name:           "Remove context-1m from middle",
-			header:         "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07,oauth-2025-04-20",
+			name:            "Remove context-1m from middle",
+			header:          "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07,oauth-2025-04-20",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			expected:        "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 		},
 		{
-			name:           "Remove context-1m from start",
-			header:         "context-1m-2025-08-07,fine-grained-tool-streaming-2025-05-14",
+			name:            "Remove context-1m from start",
+			header:          "context-1m-2025-08-07,fine-grained-tool-streaming-2025-05-14",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14",
+			expected:        "fine-grained-tool-streaming-2025-05-14",
 		},
 		{
-			name:           "Remove context-1m from end",
-			header:         "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07",
+			name:            "Remove context-1m from end",
+			header:          "fine-grained-tool-streaming-2025-05-14,context-1m-2025-08-07",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14",
+			expected:        "fine-grained-tool-streaming-2025-05-14",
 		},
 		{
-			name:           "Feature not present",
-			header:         "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			name:            "Feature not present",
+			header:          "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			expected:        "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 		},
 		{
-			name:           "Only feature to remove",
-			header:         "context-1m-2025-08-07",
+			name:            "Only feature to remove",
+			header:          "context-1m-2025-08-07",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "",
+			expected:        "",
 		},
 		{
-			name:           "Empty header",
-			header:         "",
+			name:            "Empty header",
+			header:          "",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "",
+			expected:        "",
 		},
 		{
-			name:           "Header with spaces",
-			header:         "fine-grained-tool-streaming-2025-05-14, context-1m-2025-08-07 , oauth-2025-04-20",
+			name:            "Header with spaces",
+			header:          "fine-grained-tool-streaming-2025-05-14, context-1m-2025-08-07 , oauth-2025-04-20",
 			featureToRemove: "context-1m-2025-08-07",
-			expected:       "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
+			expected:        "fine-grained-tool-streaming-2025-05-14,oauth-2025-04-20",
 		},
 	}

--- a/internal/api/modules/amp/routes.go
+++ b/internal/api/modules/amp/routes.go
@@ -6,11 +6,11 @@ import (
 	"strings"

 	"github.com/gin-gonic/gin"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/gemini"
 	"github.com/router-for-me/CLIProxyAPI/v6/sdk/api/handlers/openai"
-	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	log "github.com/sirupsen/logrus"
 )

--- a/internal/api/server.go
+++ b/internal/api/server.go
@@ -247,6 +247,9 @@ func NewServer(cfg *config.Config, authManager *auth.Manager, accessManager *sdk
 	// Save initial YAML snapshot
 	s.oldConfigYaml, _ = yaml.Marshal(cfg)
 	s.applyAccessConfig(nil, cfg)
+	if authManager != nil {
+		authManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
+	}
 	managementasset.SetCurrentConfig(cfg)
 	auth.SetQuotaCooldownDisabled(cfg.DisableCooling)
 	// Initialize management handler
@@ -509,6 +512,8 @@ func (s *Server) registerManagementRoutes() {

 		mgmt.GET("/logs", s.mgmt.GetLogs)
 		mgmt.DELETE("/logs", s.mgmt.DeleteLogs)
+		mgmt.GET("/request-error-logs", s.mgmt.GetRequestErrorLogs)
+		mgmt.GET("/request-error-logs/:name", s.mgmt.DownloadRequestErrorLog)
 		mgmt.GET("/request-log", s.mgmt.GetRequestLog)
 		mgmt.PUT("/request-log", s.mgmt.PutRequestLog)
 		mgmt.PATCH("/request-log", s.mgmt.PutRequestLog)
@@ -519,6 +524,9 @@ func (s *Server) registerManagementRoutes() {
 		mgmt.GET("/request-retry", s.mgmt.GetRequestRetry)
 		mgmt.PUT("/request-retry", s.mgmt.PutRequestRetry)
 		mgmt.PATCH("/request-retry", s.mgmt.PutRequestRetry)
+		mgmt.GET("/max-retry-interval", s.mgmt.GetMaxRetryInterval)
+		mgmt.PUT("/max-retry-interval", s.mgmt.PutMaxRetryInterval)
+		mgmt.PATCH("/max-retry-interval", s.mgmt.PutMaxRetryInterval)

 		mgmt.GET("/claude-api-key", s.mgmt.GetClaudeKeys)
 		mgmt.PUT("/claude-api-key", s.mgmt.PutClaudeKeys)
@@ -686,17 +694,33 @@ func (s *Server) unifiedModelsHandler(openaiHandler *openai.OpenAIAPIHandler, cl
 	}
 }

-// Start begins listening for and serving HTTP requests.
+// Start begins listening for and serving HTTP or HTTPS requests.
 // It's a blocking call and will only return on an unrecoverable error.
 //
 // Returns:
 //   - error: An error if the server fails to start
 func (s *Server) Start() error {
-	log.Debugf("Starting API server on %s", s.server.Addr)
+	if s == nil || s.server == nil {
+		return fmt.Errorf("failed to start HTTP server: server not initialized")
+	}

-	// Start the HTTP server.
-	if err := s.server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
-		return fmt.Errorf("failed to start HTTP server: %v", err)
+	useTLS := s.cfg != nil && s.cfg.TLS.Enable
+	if useTLS {
+		cert := strings.TrimSpace(s.cfg.TLS.Cert)
+		key := strings.TrimSpace(s.cfg.TLS.Key)
+		if cert == "" || key == "" {
+			return fmt.Errorf("failed to start HTTPS server: tls.cert or tls.key is empty")
+		}
+		log.Debugf("Starting API server on %s with TLS", s.server.Addr)
+		if errServeTLS := s.server.ListenAndServeTLS(cert, key); errServeTLS != nil && !errors.Is(errServeTLS, http.ErrServerClosed) {
+			return fmt.Errorf("failed to start HTTPS server: %v", errServeTLS)
+		}
+		return nil
+	}
+
+	log.Debugf("Starting API server on %s", s.server.Addr)
+	if errServe := s.server.ListenAndServe(); errServe != nil && !errors.Is(errServe, http.ErrServerClosed) {
+		return fmt.Errorf("failed to start HTTP server: %v", errServe)
 	}

 	return nil
@@ -814,6 +838,9 @@ func (s *Server) UpdateClients(cfg *config.Config) {
 			log.Debugf("disable_cooling toggled to %t", cfg.DisableCooling)
 		}
 	}
+	if s.handlers != nil && s.handlers.AuthManager != nil {
+		s.handlers.AuthManager.SetRetryConfig(cfg.RequestRetry, time.Duration(cfg.MaxRetryInterval)*time.Second)
+	}

 	// Update log level dynamically when debug flag changes
 	if oldCfg == nil || oldCfg.Debug != cfg.Debug {
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -23,6 +23,9 @@ type Config struct {
 	// Port is the network port on which the API server will listen.
 	Port int `yaml:"port" json:"-"`

+	// TLS config controls HTTPS server settings.
+	TLS TLSConfig `yaml:"tls" json:"tls"`
+
 	// AmpUpstreamURL defines the upstream Amp control plane used for non-provider calls.
 	AmpUpstreamURL string `yaml:"amp-upstream-url" json:"amp-upstream-url"`

@@ -63,6 +66,8 @@ type Config struct {

 	// RequestRetry defines the retry times when the request failed.
 	RequestRetry int `yaml:"request-retry" json:"request-retry"`
+	// MaxRetryInterval defines the maximum wait time in seconds before retrying a cooled-down credential.
+	MaxRetryInterval int `yaml:"max-retry-interval" json:"max-retry-interval"`

 	// ClaudeKey defines a list of Claude API key configurations as specified in the YAML configuration file.
 	ClaudeKey []ClaudeKey `yaml:"claude-api-key" json:"claude-api-key"`
@@ -80,6 +85,16 @@ type Config struct {
 	Payload PayloadConfig `yaml:"payload" json:"payload"`
 }

+// TLSConfig holds HTTPS server settings.
+type TLSConfig struct {
+	// Enable toggles HTTPS server mode.
+	Enable bool `yaml:"enable" json:"enable"`
+	// Cert is the path to the TLS certificate file.
+	Cert string `yaml:"cert" json:"cert"`
+	// Key is the path to the TLS private key file.
+	Key string `yaml:"key" json:"key"`
+}
+
 // RemoteManagement holds management API configuration under 'remote-management'.
 type RemoteManagement struct {
 	// AllowRemote toggles remote (non-localhost) access to management API.
--- a/internal/logging/request_logger.go
+++ b/internal/logging/request_logger.go
@@ -12,6 +12,7 @@ import (
 	"os"
 	"path/filepath"
 	"regexp"
+	"sort"
 	"strings"
 	"time"

@@ -156,17 +157,30 @@ func (l *FileRequestLogger) SetEnabled(enabled bool) {
 // Returns:
 //   - error: An error if logging fails, nil otherwise
 func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage) error {
-	if !l.enabled {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, false)
+}
+
+// LogRequestWithOptions logs a request with optional forced logging behavior.
+// The force flag allows writing error logs even when regular request logging is disabled.
+func (l *FileRequestLogger) LogRequestWithOptions(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool) error {
+	return l.logRequest(url, method, requestHeaders, body, statusCode, responseHeaders, response, apiRequest, apiResponse, apiResponseErrors, force)
+}
+
+func (l *FileRequestLogger) logRequest(url, method string, requestHeaders map[string][]string, body []byte, statusCode int, responseHeaders map[string][]string, response, apiRequest, apiResponse []byte, apiResponseErrors []*interfaces.ErrorMessage, force bool) error {
+	if !l.enabled && !force {
 		return nil
 	}

 	// Ensure logs directory exists
-	if err := l.ensureLogsDir(); err != nil {
-		return fmt.Errorf("failed to create logs directory: %w", err)
+	if errEnsure := l.ensureLogsDir(); errEnsure != nil {
+		return fmt.Errorf("failed to create logs directory: %w", errEnsure)
 	}

 	// Generate filename
 	filename := l.generateFilename(url)
+	if force && !l.enabled {
+		filename = l.generateErrorFilename(url)
+	}
 	filePath := filepath.Join(l.logsDir, filename)

 	// Decompress response if needed
@@ -184,6 +198,12 @@ func (l *FileRequestLogger) LogRequest(url, method string, requestHeaders map[st
 		return fmt.Errorf("failed to write log file: %w", err)
 	}

+	if force && !l.enabled {
+		if errCleanup := l.cleanupOldErrorLogs(); errCleanup != nil {
+			log.WithError(errCleanup).Warn("failed to clean up old error logs")
+		}
+	}
+
 	return nil
 }

@@ -239,6 +259,11 @@ func (l *FileRequestLogger) LogStreamingRequest(url, method string, headers map[
 	return writer, nil
 }

+// generateErrorFilename creates a filename with an error prefix to differentiate forced error logs.
+func (l *FileRequestLogger) generateErrorFilename(url string) string {
+	return fmt.Sprintf("error-%s", l.generateFilename(url))
+}
+
 // ensureLogsDir creates the logs directory if it doesn't exist.
 //
 // Returns:
@@ -312,6 +337,52 @@ func (l *FileRequestLogger) sanitizeForFilename(path string) string {
 	return sanitized
 }

+// cleanupOldErrorLogs keeps only the newest 10 forced error log files.
+func (l *FileRequestLogger) cleanupOldErrorLogs() error {
+	entries, errRead := os.ReadDir(l.logsDir)
+	if errRead != nil {
+		return errRead
+	}
+
+	type logFile struct {
+		name    string
+		modTime time.Time
+	}
+
+	var files []logFile
+	for _, entry := range entries {
+		if entry.IsDir() {
+			continue
+		}
+		name := entry.Name()
+		if !strings.HasPrefix(name, "error-") || !strings.HasSuffix(name, ".log") {
+			continue
+		}
+		info, errInfo := entry.Info()
+		if errInfo != nil {
+			log.WithError(errInfo).Warn("failed to read error log info")
+			continue
+		}
+		files = append(files, logFile{name: name, modTime: info.ModTime()})
+	}
+
+	if len(files) <= 10 {
+		return nil
+	}
+
+	sort.Slice(files, func(i, j int) bool {
+		return files[i].modTime.After(files[j].modTime)
+	})
+
+	for _, file := range files[10:] {
+		if errRemove := os.Remove(filepath.Join(l.logsDir, file.name)); errRemove != nil {
+			log.WithError(errRemove).Warnf("failed to remove old error log: %s", file.name)
+		}
+	}
+
+	return nil
+}
+
 // formatLogContent creates the complete log content for non-streaming requests.
 //
 // Parameters:
--- a/internal/registry/model_definitions.go
+++ b/internal/registry/model_definitions.go
@@ -8,60 +8,140 @@ func GetClaudeModels() []*ModelInfo {
 	return []*ModelInfo{

 		{
-			ID:          "claude-haiku-4-5-20251001",
-			Object:      "model",
-			Created:     1759276800, // 2025-10-01
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4.5 Haiku",
+			ID:                  "claude-haiku-4-5-20251001",
+			Object:              "model",
+			Created:             1759276800, // 2025-10-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Haiku",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
 		},
 		{
-			ID:          "claude-sonnet-4-5-20250929",
-			Object:      "model",
-			Created:     1759104000, // 2025-09-29
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4.5 Sonnet",
+			ID:                  "claude-sonnet-4-5-20250929",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
 		},
 		{
-			ID:          "claude-opus-4-1-20250805",
-			Object:      "model",
-			Created:     1722945600, // 2025-08-05
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4.1 Opus",
+			ID:                  "claude-sonnet-4-5-thinking",
+			Object:              "model",
+			Created:             1759104000, // 2025-09-29
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Sonnet Thinking",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-opus-4-20250514",
-			Object:      "model",
-			Created:     1715644800, // 2025-05-14
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4 Opus",
+			ID:                  "claude-opus-4-5-thinking",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-sonnet-4-20250514",
-			Object:      "model",
-			Created:     1715644800, // 2025-05-14
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 4 Sonnet",
+			ID:                  "claude-opus-4-5-thinking-low",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking Low",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-3-7-sonnet-20250219",
-			Object:      "model",
-			Created:     1708300800, // 2025-02-19
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 3.7 Sonnet",
+			ID:                  "claude-opus-4-5-thinking-medium",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking Medium",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
 		},
 		{
-			ID:          "claude-3-5-haiku-20241022",
-			Object:      "model",
-			Created:     1729555200, // 2024-10-22
-			OwnedBy:     "anthropic",
-			Type:        "claude",
-			DisplayName: "Claude 3.5 Haiku",
+			ID:                  "claude-opus-4-5-thinking-high",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus Thinking High",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+			Thinking:            &ThinkingSupport{Min: 1024, Max: 100000, ZeroAllowed: false, DynamicAllowed: true},
+		},
+		{
+			ID:                  "claude-opus-4-5-20251101",
+			Object:              "model",
+			Created:             1761955200, // 2025-11-01
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.5 Opus",
+			Description:         "Premium model combining maximum intelligence with practical performance",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+		},
+		{
+			ID:                  "claude-opus-4-1-20250805",
+			Object:              "model",
+			Created:             1722945600, // 2025-08-05
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4.1 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+		},
+		{
+			ID:                  "claude-opus-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Opus",
+			ContextLength:       200000,
+			MaxCompletionTokens: 32000,
+		},
+		{
+			ID:                  "claude-sonnet-4-20250514",
+			Object:              "model",
+			Created:             1715644800, // 2025-05-14
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 4 Sonnet",
+			ContextLength:       200000,
+			MaxCompletionTokens: 64000,
+		},
+		{
+			ID:                  "claude-3-7-sonnet-20250219",
+			Object:              "model",
+			Created:             1708300800, // 2025-02-19
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.7 Sonnet",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
+		},
+		{
+			ID:                  "claude-3-5-haiku-20241022",
+			Object:              "model",
+			Created:             1729555200, // 2024-10-22
+			OwnedBy:             "anthropic",
+			Type:                "claude",
+			DisplayName:         "Claude 3.5 Haiku",
+			ContextLength:       128000,
+			MaxCompletionTokens: 8192,
 		},
 	}
 }
@@ -129,6 +209,20 @@ func GetGeminiModels() []*ModelInfo {
 			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
 			Thinking:                   &ThinkingSupport{Min: 128, Max: 32768, ZeroAllowed: false, DynamicAllowed: true},
 		},
+		{
+			ID:                         "gemini-3-pro-image-preview",
+			Object:                     "model",
+			Created:                    1737158400,
+			OwnedBy:                    "google",
+			Type:                       "gemini",
+			Name:                       "models/gemini-3-pro-image-preview",
+			Version:                    "3.0",
+			DisplayName:                "Gemini 3 Pro Image Preview",
+			Description:                "Gemini 3 Pro Image Preview",
+			InputTokenLimit:            1048576,
+			OutputTokenLimit:           65536,
+			SupportedGenerationMethods: []string{"generateContent", "countTokens", "createCachedContent", "batchGenerateContent"},
+		},
 	}
 }

--- a/internal/registry/model_registry.go
+++ b/internal/registry/model_registry.go
@@ -826,7 +826,6 @@ func (r *ModelRegistry) CleanupExpiredQuotas() {
 	}
 }

-
 // GetFirstAvailableModel returns the first available model for the given handler type.
 // It prioritizes models by their creation timestamp (newest first) and checks if they have
 // available clients that are not suspended or over quota.
--- a/internal/runtime/executor/antigravity_executor.go
+++ b/internal/runtime/executor/antigravity_executor.go
@@ -365,13 +365,27 @@ func FetchAntigravityModels(ctx context.Context, auth *cliproxyauth.Auth, cfg *c
 		for id := range result.Map() {
 			id = modelName2Alias(id)
 			if id != "" {
-				models = append(models, &registry.ModelInfo{
-					ID:      id,
-					Object:  "model",
-					Created: now,
-					OwnedBy: antigravityAuthType,
-					Type:    antigravityAuthType,
-				})
+				modelInfo := &registry.ModelInfo{
+					ID:          id,
+					Name:        id,
+					Description: id,
+					DisplayName: id,
+					Version:     id,
+					Object:      "model",
+					Created:     now,
+					OwnedBy:     antigravityAuthType,
+					Type:        antigravityAuthType,
+				}
+				// Add Thinking support for thinking models
+				if strings.HasSuffix(id, "-thinking") || strings.Contains(id, "-thinking-") {
+					modelInfo.Thinking = &registry.ThinkingSupport{
+						Min:            1024,
+						Max:            100000,
+						ZeroAllowed:    false,
+						DynamicAllowed: true,
+					}
+				}
+				models = append(models, modelInfo)
 			}
 		}
 		return models
@@ -669,20 +683,6 @@ func geminiToAntigravity(modelName string, payload []byte) []byte {
 		}
 	}

-	gjson.Get(template, "request.contents").ForEach(func(key, content gjson.Result) bool {
-		if content.Get("role").String() == "model" {
-			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
-				if part.Get("functionCall").Exists() {
-					template, _ = sjson.Set(template, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
-				} else if part.Get("thoughtSignature").Exists() {
-					template, _ = sjson.Set(template, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
-				}
-				return true
-			})
-		}
-		return true
-	})
-
 	if strings.HasPrefix(modelName, "claude-sonnet-") {
 		gjson.Get(template, "request.tools").ForEach(func(key, tool gjson.Result) bool {
 			tool.Get("functionDeclarations").ForEach(func(funKey, funcDecl gjson.Result) bool {
--- a/internal/runtime/executor/claude_executor.go
+++ b/internal/runtime/executor/claude_executor.go
@@ -17,6 +17,7 @@ import (
 	claudeauth "github.com/router-for-me/CLIProxyAPI/v6/internal/auth/claude"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/config"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/misc"
+	"github.com/router-for-me/CLIProxyAPI/v6/internal/registry"
 	"github.com/router-for-me/CLIProxyAPI/v6/internal/util"
 	cliproxyauth "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/auth"
 	cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/executor"
@@ -58,18 +59,27 @@ func (e *ClaudeExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, r
 		body, _ = sjson.SetBytes(body, "model", modelOverride)
 		modelForUpstream = modelOverride
 	}
+	// Inject thinking config based on model suffix for thinking variants
+	body = e.injectThinkingConfig(req.Model, body)

 	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
-		body, _ = sjson.SetRawBytes(body, "system", []byte(misc.ClaudeCodeInstructions))
+		body = checkSystemInstructions(body)
 	}
 	body = applyPayloadConfig(e.cfg, req.Model, body)

+	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
+	body = ensureMaxTokensForThinking(req.Model, body)
+
+	// Extract betas from body and convert to header
+	var extraBetas []string
+	extraBetas, body = extractAndRemoveBetas(body)
+
 	url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL)
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return resp, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, false)
+	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -154,15 +164,24 @@ func (e *ClaudeExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.A
 	if modelOverride := e.resolveUpstreamModel(req.Model, auth); modelOverride != "" {
 		body, _ = sjson.SetBytes(body, "model", modelOverride)
 	}
-	body, _ = sjson.SetRawBytes(body, "system", []byte(misc.ClaudeCodeInstructions))
+	// Inject thinking config based on model suffix for thinking variants
+	body = e.injectThinkingConfig(req.Model, body)
+	body = checkSystemInstructions(body)
 	body = applyPayloadConfig(e.cfg, req.Model, body)

+	// Ensure max_tokens > thinking.budget_tokens when thinking is enabled
+	body = ensureMaxTokensForThinking(req.Model, body)
+
+	// Extract betas from body and convert to header
+	var extraBetas []string
+	extraBetas, body = extractAndRemoveBetas(body)
+
 	url := fmt.Sprintf("%s/v1/messages?beta=true", baseURL)
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return nil, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, true)
+	applyClaudeHeaders(httpReq, auth, apiKey, true, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -283,15 +302,19 @@ func (e *ClaudeExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.Aut
 	}

 	if !strings.HasPrefix(modelForUpstream, "claude-3-5-haiku") {
-		body, _ = sjson.SetRawBytes(body, "system", []byte(misc.ClaudeCodeInstructions))
+		body = checkSystemInstructions(body)
 	}

+	// Extract betas from body and convert to header (for count_tokens too)
+	var extraBetas []string
+	extraBetas, body = extractAndRemoveBetas(body)
+
 	url := fmt.Sprintf("%s/v1/messages/count_tokens?beta=true", baseURL)
 	httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body))
 	if err != nil {
 		return cliproxyexecutor.Response{}, err
 	}
-	applyClaudeHeaders(httpReq, auth, apiKey, false)
+	applyClaudeHeaders(httpReq, auth, apiKey, false, extraBetas)
 	var authID, authLabel, authType, authValue string
 	if auth != nil {
 		authID = auth.ID
@@ -383,10 +406,101 @@ func (e *ClaudeExecutor) Refresh(ctx context.Context, auth *cliproxyauth.Auth) (
 	return auth, nil
 }

+// extractAndRemoveBetas extracts the "betas" array from the body and removes it.
+// Returns the extracted betas as a string slice and the modified body.
+func extractAndRemoveBetas(body []byte) ([]string, []byte) {
+	betasResult := gjson.GetBytes(body, "betas")
+	if !betasResult.Exists() {
+		return nil, body
+	}
+	var betas []string
+	if betasResult.IsArray() {
+		for _, item := range betasResult.Array() {
+			if s := strings.TrimSpace(item.String()); s != "" {
+				betas = append(betas, s)
+			}
+		}
+	} else if s := strings.TrimSpace(betasResult.String()); s != "" {
+		betas = append(betas, s)
+	}
+	body, _ = sjson.DeleteBytes(body, "betas")
+	return betas, body
+}
+
+// injectThinkingConfig adds thinking configuration based on model name suffix
+func (e *ClaudeExecutor) injectThinkingConfig(modelName string, body []byte) []byte {
+	// Only inject if thinking config is not already present
+	if gjson.GetBytes(body, "thinking").Exists() {
+		return body
+	}
+
+	var budgetTokens int
+	switch {
+	case strings.HasSuffix(modelName, "-thinking-low"):
+		budgetTokens = 1024
+	case strings.HasSuffix(modelName, "-thinking-medium"):
+		budgetTokens = 8192
+	case strings.HasSuffix(modelName, "-thinking-high"):
+		budgetTokens = 24576
+	case strings.HasSuffix(modelName, "-thinking"):
+		// Default thinking without suffix uses medium budget
+		budgetTokens = 8192
+	default:
+		return body
+	}
+
+	body, _ = sjson.SetBytes(body, "thinking.type", "enabled")
+	body, _ = sjson.SetBytes(body, "thinking.budget_tokens", budgetTokens)
+	return body
+}
+
+// ensureMaxTokensForThinking ensures max_tokens > thinking.budget_tokens when thinking is enabled.
+// Anthropic API requires this constraint; violating it returns a 400 error.
+// This function should be called after all thinking configuration is finalized.
+// It looks up the model's MaxCompletionTokens from the registry to use as the cap.
+func ensureMaxTokensForThinking(modelName string, body []byte) []byte {
+	thinkingType := gjson.GetBytes(body, "thinking.type").String()
+	if thinkingType != "enabled" {
+		return body
+	}
+
+	budgetTokens := gjson.GetBytes(body, "thinking.budget_tokens").Int()
+	if budgetTokens <= 0 {
+		return body
+	}
+
+	maxTokens := gjson.GetBytes(body, "max_tokens").Int()
+
+	// Look up the model's max completion tokens from the registry
+	maxCompletionTokens := 0
+	if modelInfo := registry.GetGlobalRegistry().GetModelInfo(modelName); modelInfo != nil {
+		maxCompletionTokens = modelInfo.MaxCompletionTokens
+	}
+
+	// Fall back to budget + buffer if registry lookup fails or returns 0
+	const fallbackBuffer = 4000
+	requiredMaxTokens := budgetTokens + fallbackBuffer
+	if maxCompletionTokens > 0 {
+		requiredMaxTokens = int64(maxCompletionTokens)
+	}
+
+	if maxTokens < requiredMaxTokens {
+		body, _ = sjson.SetBytes(body, "max_tokens", requiredMaxTokens)
+	}
+	return body
+}
+
 func (e *ClaudeExecutor) resolveUpstreamModel(alias string, auth *cliproxyauth.Auth) string {
 	if alias == "" {
 		return ""
 	}
+	// Hardcoded mappings for thinking models to actual Claude model names
+	switch alias {
+	case "claude-opus-4-5-thinking", "claude-opus-4-5-thinking-low", "claude-opus-4-5-thinking-medium", "claude-opus-4-5-thinking-high":
+		return "claude-opus-4-5-20251101"
+	case "claude-sonnet-4-5-thinking":
+		return "claude-sonnet-4-5-20250929"
+	}
 	entry := e.resolveClaudeConfig(auth)
 	if entry == nil {
 		return ""
@@ -530,7 +644,7 @@ func decodeResponseBody(body io.ReadCloser, contentEncoding string) (io.ReadClos
 	return body, nil
 }

-func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool) {
+func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string, stream bool, extraBetas []string) {
 	r.Header.Set("Authorization", "Bearer "+apiKey)
 	r.Header.Set("Content-Type", "application/json")

@@ -539,15 +653,30 @@ func applyClaudeHeaders(r *http.Request, auth *cliproxyauth.Auth, apiKey string,
 		ginHeaders = ginCtx.Request.Header
 	}

+	baseBetas := "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14"
 	if val := strings.TrimSpace(ginHeaders.Get("Anthropic-Beta")); val != "" {
+		baseBetas = val
 		if !strings.Contains(val, "oauth") {
-			val += ",oauth-2025-04-20"
+			baseBetas += ",oauth-2025-04-20"
 		}
-		r.Header.Set("Anthropic-Beta", val)
-	} else {
-		r.Header.Set("Anthropic-Beta", "claude-code-20250219,oauth-2025-04-20,interleaved-thinking-2025-05-14,fine-grained-tool-streaming-2025-05-14")
 	}

+	// Merge extra betas from request body
+	if len(extraBetas) > 0 {
+		existingSet := make(map[string]bool)
+		for _, b := range strings.Split(baseBetas, ",") {
+			existingSet[strings.TrimSpace(b)] = true
+		}
+		for _, beta := range extraBetas {
+			beta = strings.TrimSpace(beta)
+			if beta != "" && !existingSet[beta] {
+				baseBetas += "," + beta
+				existingSet[beta] = true
+			}
+		}
+	}
+	r.Header.Set("Anthropic-Beta", baseBetas)
+
 	misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Version", "2023-06-01")
 	misc.EnsureHeader(r.Header, ginHeaders, "Anthropic-Dangerous-Direct-Browser-Access", "true")
 	misc.EnsureHeader(r.Header, ginHeaders, "X-App", "cli")
@@ -590,3 +719,22 @@ func claudeCreds(a *cliproxyauth.Auth) (apiKey, baseURL string) {
 	}
 	return
 }
+
+func checkSystemInstructions(payload []byte) []byte {
+	system := gjson.GetBytes(payload, "system")
+	claudeCodeInstructions := `[{"type":"text","text":"You are Claude Code, Anthropic's official CLI for Claude."}]`
+	if system.IsArray() {
+		if gjson.GetBytes(payload, "system.0.text").String() != "You are Claude Code, Anthropic's official CLI for Claude." {
+			system.ForEach(func(_, part gjson.Result) bool {
+				if part.Get("type").String() == "text" {
+					claudeCodeInstructions, _ = sjson.SetRaw(claudeCodeInstructions, "-1", part.Raw)
+				}
+				return true
+			})
+			payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+		}
+	} else {
+		payload, _ = sjson.SetRawBytes(payload, "system", []byte(claudeCodeInstructions))
+	}
+	return payload
+}
--- a/internal/translator/antigravity/gemini/antigravity_gemini_request.go
+++ b/internal/translator/antigravity/gemini/antigravity_gemini_request.go
@@ -98,6 +98,20 @@ func ConvertGeminiRequestToAntigravity(_ string, inputRawJSON []byte, _ bool) []
 		}
 	}

+	gjson.GetBytes(rawJSON, "request.contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})
+
 	return common.AttachDefaultSafetySettings(rawJSON, "request.safetySettings")
 }

--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_request.go
@@ -271,7 +271,15 @@ func ConvertOpenAIRequestToAntigravity(modelName string, inputRawJSON []byte, _
 								if resp == "" {
 									resp = "{}"
 								}
-								toolNode, _ = sjson.SetBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", []byte(resp))
+								// Handle non-JSON output gracefully (matches dev branch approach)
+								if resp != "null" {
+									parsed := gjson.Parse(resp)
+									if parsed.Type == gjson.JSON {
+										toolNode, _ = sjson.SetRawBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", []byte(parsed.Raw))
+									} else {
+										toolNode, _ = sjson.SetBytes(toolNode, "parts."+itoa(pp)+".functionResponse.response.result", resp)
+									}
+								}
 								pp++
 							}
 						}
--- a/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
+++ b/internal/translator/antigravity/openai/chat-completions/antigravity_openai_response.go
@@ -105,14 +105,19 @@ func ConvertAntigravityResponseToOpenAI(_ context.Context, _ string, originalReq
 			partTextResult := partResult.Get("text")
 			functionCallResult := partResult.Get("functionCall")
 			thoughtSignatureResult := partResult.Get("thoughtSignature")
+			if !thoughtSignatureResult.Exists() {
+				thoughtSignatureResult = partResult.Get("thought_signature")
+			}
 			inlineDataResult := partResult.Get("inlineData")
 			if !inlineDataResult.Exists() {
 				inlineDataResult = partResult.Get("inline_data")
 			}

-			// Handle thoughtSignature - this is encrypted reasoning content that should not be exposed to the client
-			if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
-				// Skip thoughtSignature processing - it's internal encrypted data
+			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+			// Ignore encrypted thoughtSignature but keep any actual content in the same part.
+			if hasThoughtSignature && !hasContentPayload {
 				continue
 			}

--- a/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
+++ b/internal/translator/gemini-cli/gemini/gemini-cli_gemini_request.go
@@ -98,6 +98,20 @@ func ConvertGeminiRequestToGeminiCLI(_ string, inputRawJSON []byte, _ bool) []by
 		}
 	}

+	gjson.GetBytes(rawJSON, "request.contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("request.contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})
+
 	return common.AttachDefaultSafetySettings(rawJSON, "request.safetySettings")
 }

--- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
+++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_response.go
@@ -105,14 +105,19 @@ func ConvertCliResponseToOpenAI(_ context.Context, _ string, originalRequestRawJ
 			partTextResult := partResult.Get("text")
 			functionCallResult := partResult.Get("functionCall")
 			thoughtSignatureResult := partResult.Get("thoughtSignature")
+			if !thoughtSignatureResult.Exists() {
+				thoughtSignatureResult = partResult.Get("thought_signature")
+			}
 			inlineDataResult := partResult.Get("inlineData")
 			if !inlineDataResult.Exists() {
 				inlineDataResult = partResult.Get("inline_data")
 			}

-			// Handle thoughtSignature - this is encrypted reasoning content that should not be exposed to the client
-			if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
-				// Skip thoughtSignature processing - it's internal encrypted data
+			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+			// Ignore encrypted thoughtSignature but keep any actual content in the same part.
+			if hasThoughtSignature && !hasContentPayload {
 				continue
 			}

--- a/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
+++ b/internal/translator/gemini/gemini-cli/gemini_gemini-cli_request.go
@@ -46,5 +46,19 @@ func ConvertGeminiCLIRequestToGemini(_ string, inputRawJSON []byte, _ bool) []by
 		}
 	}

+	gjson.GetBytes(rawJSON, "contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					rawJSON, _ = sjson.SetBytes(rawJSON, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})
+
 	return common.AttachDefaultSafetySettings(rawJSON, "safetySettings")
 }
--- a/internal/translator/gemini/gemini/gemini_gemini_request.go
+++ b/internal/translator/gemini/gemini/gemini_gemini_request.go
@@ -30,6 +30,11 @@ func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte
 	if toolsResult.Exists() && toolsResult.IsArray() {
 		toolResults := toolsResult.Array()
 		for i := 0; i < len(toolResults); i++ {
+			if gjson.GetBytes(rawJSON, fmt.Sprintf("tools.%d.functionDeclarations", i)).Exists() {
+				strJson, _ := util.RenameKey(string(rawJSON), fmt.Sprintf("tools.%d.functionDeclarations", i), fmt.Sprintf("tools.%d.function_declarations", i))
+				rawJSON = []byte(strJson)
+			}
+
 			functionDeclarationsResult := gjson.GetBytes(rawJSON, fmt.Sprintf("tools.%d.function_declarations", i))
 			if functionDeclarationsResult.Exists() && functionDeclarationsResult.IsArray() {
 				functionDeclarationsResults := functionDeclarationsResult.Array()
@@ -72,7 +77,20 @@ func ConvertGeminiRequestToGemini(_ string, inputRawJSON []byte, _ bool) []byte
 		return true
 	})

-	out = common.AttachDefaultSafetySettings(out, "safetySettings")
+	gjson.GetBytes(out, "contents").ForEach(func(key, content gjson.Result) bool {
+		if content.Get("role").String() == "model" {
+			content.Get("parts").ForEach(func(partKey, part gjson.Result) bool {
+				if part.Get("functionCall").Exists() {
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				} else if part.Get("thoughtSignature").Exists() {
+					out, _ = sjson.SetBytes(out, fmt.Sprintf("contents.%d.parts.%d.thoughtSignature", key.Int(), partKey.Int()), "skip_thought_signature_validator")
+				}
+				return true
+			})
+		}
+		return true
+	})

+	out = common.AttachDefaultSafetySettings(out, "safetySettings")
 	return out
 }
--- a/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
+++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_response.go
@@ -116,8 +116,11 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
 				thoughtSignatureResult = partResult.Get("thought_signature")
 			}

-			// Skip thoughtSignature parts (encrypted reasoning not exposed downstream).
-			if thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != "" {
+			hasThoughtSignature := thoughtSignatureResult.Exists() && thoughtSignatureResult.String() != ""
+			hasContentPayload := partTextResult.Exists() || functionCallResult.Exists() || inlineDataResult.Exists()
+
+			// Skip pure thoughtSignature parts but keep any actual payload in the same part.
+			if hasThoughtSignature && !hasContentPayload {
 				continue
 			}

--- a/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
+++ b/internal/translator/gemini/openai/responses/gemini_openai-responses_request.go
@@ -33,7 +33,83 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte

 	// Convert input messages to Gemini contents format
 	if input := root.Get("input"); input.Exists() && input.IsArray() {
-		input.ForEach(func(_, item gjson.Result) bool {
+		items := input.Array()
+
+		// Normalize consecutive function calls and outputs so each call is immediately followed by its response
+		normalized := make([]gjson.Result, 0, len(items))
+		for i := 0; i < len(items); {
+			item := items[i]
+			itemType := item.Get("type").String()
+			itemRole := item.Get("role").String()
+			if itemType == "" && itemRole != "" {
+				itemType = "message"
+			}
+
+			if itemType == "function_call" {
+				var calls []gjson.Result
+				var outputs []gjson.Result
+
+				for i < len(items) {
+					next := items[i]
+					nextType := next.Get("type").String()
+					nextRole := next.Get("role").String()
+					if nextType == "" && nextRole != "" {
+						nextType = "message"
+					}
+					if nextType != "function_call" {
+						break
+					}
+					calls = append(calls, next)
+					i++
+				}
+
+				for i < len(items) {
+					next := items[i]
+					nextType := next.Get("type").String()
+					nextRole := next.Get("role").String()
+					if nextType == "" && nextRole != "" {
+						nextType = "message"
+					}
+					if nextType != "function_call_output" {
+						break
+					}
+					outputs = append(outputs, next)
+					i++
+				}
+
+				if len(calls) > 0 {
+					outputMap := make(map[string]gjson.Result, len(outputs))
+					for _, out := range outputs {
+						outputMap[out.Get("call_id").String()] = out
+					}
+					for _, call := range calls {
+						normalized = append(normalized, call)
+						callID := call.Get("call_id").String()
+						if resp, ok := outputMap[callID]; ok {
+							normalized = append(normalized, resp)
+							delete(outputMap, callID)
+						}
+					}
+					for _, out := range outputs {
+						if _, ok := outputMap[out.Get("call_id").String()]; ok {
+							normalized = append(normalized, out)
+						}
+					}
+					continue
+				}
+			}
+
+			if itemType == "function_call_output" {
+				normalized = append(normalized, item)
+				i++
+				continue
+			}
+
+			normalized = append(normalized, item)
+			i++
+		}
+
+		for _, item := range normalized {
 			itemType := item.Get("type").String()
 			itemRole := item.Get("role").String()
 			if itemType == "" && itemRole != "" {
@@ -59,7 +135,7 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 							out, _ = sjson.SetRaw(out, "system_instruction", systemInstr)
 						}
 					}
-					return true
+					continue
 				}

 				// Handle regular messages
@@ -186,7 +262,8 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte
 			case "function_call_output":
 				// Handle function call outputs - convert to function message with functionResponse
 				callID := item.Get("call_id").String()
-				output := item.Get("output").String()
+				// Use .Raw to preserve the JSON encoding (includes quotes for strings)
+				outputRaw := item.Get("output").Str

 				functionContent := `{"role":"function","parts":[]}`
 				functionResponse := `{"functionResponse":{"name":"","response":{}}}`
@@ -209,18 +286,19 @@ func ConvertOpenAIResponsesRequestToGemini(modelName string, inputRawJSON []byte

 				functionResponse, _ = sjson.Set(functionResponse, "functionResponse.name", functionName)

-				// Parse output JSON string and set as response content
-				if output != "" {
-					outputResult := gjson.Parse(output)
-					functionResponse, _ = sjson.Set(functionResponse, "functionResponse.response.result", outputResult.Raw)
+				// Set the raw JSON output directly (preserves string encoding)
+				if outputRaw != "" && outputRaw != "null" {
+					output := gjson.Parse(outputRaw)
+					if output.Type == gjson.JSON {
+						functionResponse, _ = sjson.SetRaw(functionResponse, "functionResponse.response.result", output.Raw)
+					} else {
+						functionResponse, _ = sjson.Set(functionResponse, "functionResponse.response.result", outputRaw)
+					}
 				}
-
 				functionContent, _ = sjson.SetRaw(functionContent, "parts.-1", functionResponse)
 				out, _ = sjson.SetRaw(out, "contents.-1", functionContent)
 			}
-
-			return true
-		})
+		}
 	} else if input.Exists() && input.Type == gjson.String {
 		// Simple string input conversion to user message
 		userContent := `{"role":"user","parts":[{"text":""}]}`
--- a/internal/translator/openai/openai/responses/openai_openai-responses_request.go
+++ b/internal/translator/openai/openai/responses/openai_openai-responses_request.go
@@ -202,6 +202,8 @@ func ConvertOpenAIResponsesRequestToOpenAIChatCompletions(modelName string, inpu
 			out, _ = sjson.Set(out, "reasoning_effort", "medium")
 		case "high":
 			out, _ = sjson.Set(out, "reasoning_effort", "high")
+		case "xhigh":
+			out, _ = sjson.Set(out, "reasoning_effort", "xhigh")
 		default:
 			out, _ = sjson.Set(out, "reasoning_effort", "auto")
 		}
--- a/internal/watcher/watcher.go
+++ b/internal/watcher/watcher.go
@@ -1419,6 +1419,9 @@ func buildConfigChangeDetails(oldCfg, newCfg *config.Config) []string {
 	if oldCfg.RequestRetry != newCfg.RequestRetry {
 		changes = append(changes, fmt.Sprintf("request-retry: %d -> %d", oldCfg.RequestRetry, newCfg.RequestRetry))
 	}
+	if oldCfg.MaxRetryInterval != newCfg.MaxRetryInterval {
+		changes = append(changes, fmt.Sprintf("max-retry-interval: %d -> %d", oldCfg.MaxRetryInterval, newCfg.MaxRetryInterval))
+	}
 	if oldCfg.ProxyURL != newCfg.ProxyURL {
 		changes = append(changes, fmt.Sprintf("proxy-url: %s -> %s", oldCfg.ProxyURL, newCfg.ProxyURL))
 	}
--- a/sdk/api/handlers/gemini/gemini_handlers.go
+++ b/sdk/api/handlers/gemini/gemini_handlers.go
@@ -69,6 +69,27 @@ func (h *GeminiAPIHandler) GeminiGetHandler(c *gin.Context) {
 		return
 	}
 	switch request.Action {
+	case "gemini-3-pro-preview":
+		c.JSON(http.StatusOK, gin.H{
+			"name":             "models/gemini-3-pro-preview",
+			"version":          "3",
+			"displayName":      "Gemini 3 Pro Preview",
+			"description":      "Gemini 3 Pro Preview",
+			"inputTokenLimit":  1048576,
+			"outputTokenLimit": 65536,
+			"supportedGenerationMethods": []string{
+				"generateContent",
+				"countTokens",
+				"createCachedContent",
+				"batchGenerateContent",
+			},
+			"temperature":    1,
+			"topP":           0.95,
+			"topK":           64,
+			"maxTemperature": 2,
+			"thinking":       true,
+		},
+		)
 	case "gemini-2.5-pro":
 		c.JSON(http.StatusOK, gin.H{
 			"name":             "models/gemini-2.5-pro",
--- a/sdk/api/handlers/handlers.go
+++ b/sdk/api/handlers/handlers.go
@@ -4,6 +4,7 @@
 package handlers

 import (
+	"bytes"
 	"fmt"
 	"net/http"
 	"strings"
@@ -120,11 +121,11 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 				data := params[0]
 				switch data.(type) {
 				case []byte:
-					c.Set("API_RESPONSE", data.([]byte))
+					appendAPIResponse(c, data.([]byte))
 				case error:
-					c.Set("API_RESPONSE", []byte(data.(error).Error()))
+					appendAPIResponse(c, []byte(data.(error).Error()))
 				case string:
-					c.Set("API_RESPONSE", []byte(data.(string)))
+					appendAPIResponse(c, []byte(data.(string)))
 				case bool:
 				case nil:
 				}
@@ -135,6 +136,28 @@ func (h *BaseAPIHandler) GetContextWithCancel(handler interfaces.APIHandler, c *
 	}
 }

+// appendAPIResponse preserves any previously captured API response and appends new data.
+func appendAPIResponse(c *gin.Context, data []byte) {
+	if c == nil || len(data) == 0 {
+		return
+	}
+
+	if existing, exists := c.Get("API_RESPONSE"); exists {
+		if existingBytes, ok := existing.([]byte); ok && len(existingBytes) > 0 {
+			combined := make([]byte, 0, len(existingBytes)+len(data)+1)
+			combined = append(combined, existingBytes...)
+			if existingBytes[len(existingBytes)-1] != '\n' {
+				combined = append(combined, '\n')
+			}
+			combined = append(combined, data...)
+			c.Set("API_RESPONSE", combined)
+			return
+		}
+	}
+
+	c.Set("API_RESPONSE", bytes.Clone(data))
+}
+
 // ExecuteWithAuthManager executes a non-streaming request via the core auth manager.
 // This path is the only supported execution route.
 func (h *BaseAPIHandler) ExecuteWithAuthManager(ctx context.Context, handlerType, modelName string, rawJSON []byte, alt string) ([]byte, *interfaces.ErrorMessage) {
@@ -297,7 +320,7 @@ func (h *BaseAPIHandler) ExecuteStreamWithAuthManager(ctx context.Context, handl
 func (h *BaseAPIHandler) getRequestDetails(modelName string) (providers []string, normalizedModel string, metadata map[string]any, err *interfaces.ErrorMessage) {
 	// Resolve "auto" model to an actual available model first
 	resolvedModelName := util.ResolveAutoModel(modelName)
-	
+
 	providerName, extractedModelName, isDynamic := h.parseDynamicModel(resolvedModelName)

 	// First, normalize the model name to handle suffixes like "-thinking-128"
--- a/sdk/cliproxy/auth/manager.go
+++ b/sdk/cliproxy/auth/manager.go
@@ -106,6 +106,10 @@ type Manager struct {
 	// providerOffsets tracks per-model provider rotation state for multi-provider routing.
 	providerOffsets map[string]int

+	// Retry controls request retry behavior.
+	requestRetry     atomic.Int32
+	maxRetryInterval atomic.Int64
+
 	// Optional HTTP RoundTripper provider injected by host.
 	rtProvider RoundTripperProvider

@@ -145,6 +149,21 @@ func (m *Manager) SetRoundTripperProvider(p RoundTripperProvider) {
 	m.mu.Unlock()
 }

+// SetRetryConfig updates retry attempts and cooldown wait interval.
+func (m *Manager) SetRetryConfig(retry int, maxRetryInterval time.Duration) {
+	if m == nil {
+		return
+	}
+	if retry < 0 {
+		retry = 0
+	}
+	if maxRetryInterval < 0 {
+		maxRetryInterval = 0
+	}
+	m.requestRetry.Store(int32(retry))
+	m.maxRetryInterval.Store(maxRetryInterval.Nanoseconds())
+}
+
 // RegisterExecutor registers a provider executor with the manager.
 func (m *Manager) RegisterExecutor(executor ProviderExecutor) {
 	if executor == nil {
@@ -188,8 +207,12 @@ func (m *Manager) Update(ctx context.Context, auth *Auth) (*Auth, error) {
 	if auth == nil || auth.ID == "" {
 		return nil, nil
 	}
-	auth.EnsureIndex()
 	m.mu.Lock()
+	if existing, ok := m.auths[auth.ID]; ok && existing != nil && !auth.indexAssigned && auth.Index == 0 {
+		auth.Index = existing.Index
+		auth.indexAssigned = existing.indexAssigned
+	}
+	auth.EnsureIndex()
 	m.auths[auth.ID] = auth.Clone()
 	m.mu.Unlock()
 	_ = m.persist(ctx, auth)
@@ -229,13 +252,28 @@ func (m *Manager) Execute(ctx context.Context, providers []string, req cliproxye
 	rotated := m.rotateProviders(req.Model, normalized)
 	defer m.advanceProviderCursor(req.Model, normalized)

+	retryTimes, maxWait := m.retrySettings()
+	attempts := retryTimes + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
 	var lastErr error
-	for _, provider := range rotated {
-		resp, errExec := m.executeWithProvider(ctx, provider, req, opts)
+	for attempt := 0; attempt < attempts; attempt++ {
+		resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) {
+			return m.executeWithProvider(execCtx, provider, req, opts)
+		})
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait)
+		if !shouldRetry {
+			break
+		}
+		if errWait := waitForCooldown(ctx, wait); errWait != nil {
+			return cliproxyexecutor.Response{}, errWait
+		}
 	}
 	if lastErr != nil {
 		return cliproxyexecutor.Response{}, lastErr
@@ -253,13 +291,28 @@ func (m *Manager) ExecuteCount(ctx context.Context, providers []string, req clip
 	rotated := m.rotateProviders(req.Model, normalized)
 	defer m.advanceProviderCursor(req.Model, normalized)

+	retryTimes, maxWait := m.retrySettings()
+	attempts := retryTimes + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
 	var lastErr error
-	for _, provider := range rotated {
-		resp, errExec := m.executeCountWithProvider(ctx, provider, req, opts)
+	for attempt := 0; attempt < attempts; attempt++ {
+		resp, errExec := m.executeProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (cliproxyexecutor.Response, error) {
+			return m.executeCountWithProvider(execCtx, provider, req, opts)
+		})
 		if errExec == nil {
 			return resp, nil
 		}
 		lastErr = errExec
+		wait, shouldRetry := m.shouldRetryAfterError(errExec, attempt, attempts, rotated, req.Model, maxWait)
+		if !shouldRetry {
+			break
+		}
+		if errWait := waitForCooldown(ctx, wait); errWait != nil {
+			return cliproxyexecutor.Response{}, errWait
+		}
 	}
 	if lastErr != nil {
 		return cliproxyexecutor.Response{}, lastErr
@@ -277,13 +330,28 @@ func (m *Manager) ExecuteStream(ctx context.Context, providers []string, req cli
 	rotated := m.rotateProviders(req.Model, normalized)
 	defer m.advanceProviderCursor(req.Model, normalized)

+	retryTimes, maxWait := m.retrySettings()
+	attempts := retryTimes + 1
+	if attempts < 1 {
+		attempts = 1
+	}
+
 	var lastErr error
-	for _, provider := range rotated {
-		chunks, errStream := m.executeStreamWithProvider(ctx, provider, req, opts)
+	for attempt := 0; attempt < attempts; attempt++ {
+		chunks, errStream := m.executeStreamProvidersOnce(ctx, rotated, func(execCtx context.Context, provider string) (<-chan cliproxyexecutor.StreamChunk, error) {
+			return m.executeStreamWithProvider(execCtx, provider, req, opts)
+		})
 		if errStream == nil {
 			return chunks, nil
 		}
 		lastErr = errStream
+		wait, shouldRetry := m.shouldRetryAfterError(errStream, attempt, attempts, rotated, req.Model, maxWait)
+		if !shouldRetry {
+			break
+		}
+		if errWait := waitForCooldown(ctx, wait); errWait != nil {
+			return nil, errWait
+		}
 	}
 	if lastErr != nil {
 		return nil, lastErr
@@ -507,6 +575,123 @@ func (m *Manager) advanceProviderCursor(model string, providers []string) {
 	m.mu.Unlock()
 }

+func (m *Manager) retrySettings() (int, time.Duration) {
+	if m == nil {
+		return 0, 0
+	}
+	return int(m.requestRetry.Load()), time.Duration(m.maxRetryInterval.Load())
+}
+
+func (m *Manager) closestCooldownWait(providers []string, model string) (time.Duration, bool) {
+	if m == nil || len(providers) == 0 {
+		return 0, false
+	}
+	now := time.Now()
+	providerSet := make(map[string]struct{}, len(providers))
+	for i := range providers {
+		key := strings.TrimSpace(strings.ToLower(providers[i]))
+		if key == "" {
+			continue
+		}
+		providerSet[key] = struct{}{}
+	}
+	m.mu.RLock()
+	defer m.mu.RUnlock()
+	var (
+		found   bool
+		minWait time.Duration
+	)
+	for _, auth := range m.auths {
+		if auth == nil {
+			continue
+		}
+		providerKey := strings.TrimSpace(strings.ToLower(auth.Provider))
+		if _, ok := providerSet[providerKey]; !ok {
+			continue
+		}
+		blocked, reason, next := isAuthBlockedForModel(auth, model, now)
+		if !blocked || next.IsZero() || reason == blockReasonDisabled {
+			continue
+		}
+		wait := next.Sub(now)
+		if wait < 0 {
+			continue
+		}
+		if !found || wait < minWait {
+			minWait = wait
+			found = true
+		}
+	}
+	return minWait, found
+}
+
+func (m *Manager) shouldRetryAfterError(err error, attempt, maxAttempts int, providers []string, model string, maxWait time.Duration) (time.Duration, bool) {
+	if err == nil || attempt >= maxAttempts-1 {
+		return 0, false
+	}
+	if maxWait <= 0 {
+		return 0, false
+	}
+	if status := statusCodeFromError(err); status == http.StatusOK {
+		return 0, false
+	}
+	wait, found := m.closestCooldownWait(providers, model)
+	if !found || wait > maxWait {
+		return 0, false
+	}
+	return wait, true
+}
+
+func waitForCooldown(ctx context.Context, wait time.Duration) error {
+	if wait <= 0 {
+		return nil
+	}
+	timer := time.NewTimer(wait)
+	defer timer.Stop()
+	select {
+	case <-ctx.Done():
+		return ctx.Err()
+	case <-timer.C:
+		return nil
+	}
+}
+
+func (m *Manager) executeProvidersOnce(ctx context.Context, providers []string, fn func(context.Context, string) (cliproxyexecutor.Response, error)) (cliproxyexecutor.Response, error) {
+	if len(providers) == 0 {
+		return cliproxyexecutor.Response{}, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	var lastErr error
+	for _, provider := range providers {
+		resp, errExec := fn(ctx, provider)
+		if errExec == nil {
+			return resp, nil
+		}
+		lastErr = errExec
+	}
+	if lastErr != nil {
+		return cliproxyexecutor.Response{}, lastErr
+	}
+	return cliproxyexecutor.Response{}, &Error{Code: "auth_not_found", Message: "no auth available"}
+}
+
+func (m *Manager) executeStreamProvidersOnce(ctx context.Context, providers []string, fn func(context.Context, string) (<-chan cliproxyexecutor.StreamChunk, error)) (<-chan cliproxyexecutor.StreamChunk, error) {
+	if len(providers) == 0 {
+		return nil, &Error{Code: "provider_not_found", Message: "no provider supplied"}
+	}
+	var lastErr error
+	for _, provider := range providers {
+		chunks, errExec := fn(ctx, provider)
+		if errExec == nil {
+			return chunks, nil
+		}
+		lastErr = errExec
+	}
+	if lastErr != nil {
+		return nil, lastErr
+	}
+	return nil, &Error{Code: "auth_not_found", Message: "no auth available"}
+}
+
 // MarkResult records an execution result and notifies hooks.
 func (m *Manager) MarkResult(ctx context.Context, result Result) {
 	if result.AuthID == "" {
@@ -762,6 +947,20 @@ func cloneError(err *Error) *Error {
 	}
 }

+func statusCodeFromError(err error) int {
+	if err == nil {
+		return 0
+	}
+	type statusCoder interface {
+		StatusCode() int
+	}
+	var sc statusCoder
+	if errors.As(err, &sc) && sc != nil {
+		return sc.StatusCode()
+	}
+	return 0
+}
+
 func retryAfterFromError(err error) *time.Duration {
 	if err == nil {
 		return nil
--- a/sdk/cliproxy/service.go
+++ b/sdk/cliproxy/service.go
@@ -281,6 +281,14 @@ func (s *Service) applyCoreAuthRemoval(ctx context.Context, id string) {
 	}
 }

+func (s *Service) applyRetryConfig(cfg *config.Config) {
+	if s == nil || s.coreManager == nil || cfg == nil {
+		return
+	}
+	maxInterval := time.Duration(cfg.MaxRetryInterval) * time.Second
+	s.coreManager.SetRetryConfig(cfg.RequestRetry, maxInterval)
+}
+
 func openAICompatInfoFromAuth(a *coreauth.Auth) (providerKey string, compatName string, ok bool) {
 	if a == nil {
 		return "", "", false
@@ -394,6 +402,8 @@ func (s *Service) Run(ctx context.Context) error {
 		return err
 	}

+	s.applyRetryConfig(s.cfg)
+
 	if s.coreManager != nil {
 		if errLoad := s.coreManager.Load(ctx); errLoad != nil {
 			log.Warnf("failed to load auth store: %v", errLoad)
@@ -476,6 +486,7 @@ func (s *Service) Run(ctx context.Context) error {
 		if newCfg == nil {
 			return
 		}
+		s.applyRetryConfig(newCfg)
 		if s.server != nil {
 			s.server.UpdateClients(newCfg)
 		}
Author	SHA1	Message	Date
Luis Pater	72c7ef7647	fix(translator): handle non-JSON output parsing for OpenAI function responses Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Updated `antigravity_openai_request.go` to process non-JSON outputs gracefully by verifying and distinguishing between JSON and plain string formats. - Ensured proper assignment of parsed or raw response to `functionResponse`.	2025-11-27 16:18:49 +08:00
Luis Pater	d2e4639b2a	feat(registry): add context length and update max tokens for Claude model configurations - Added `ContextLength` field with a value of 200,000 to all applicable Claude model definitions. - Standardized `MaxCompletionTokens` values across models for consistency and alignment.	2025-11-27 16:13:25 +08:00
Luis Pater	08321223c4	Merge pull request #340 from nestharus/fix/339-thinking-openai-gemini-compat fix(thinking): resolve OpenAI/Gemini compatibility for thinking model…	2025-11-27 16:03:24 +08:00
Luis Pater	7e30157590	Fixed: #354 fix(translator): add support for "xhigh" reasoning effort in OpenAI responses - Updated handling in `openai_openai-responses_request.go` to include the new "xhigh" reasoning effort level.	2025-11-27 15:59:15 +08:00
nestharus	e73cdf5cff	fix(claude): ensure max_tokens exceeds thinking budget for thinking models Fixes an issue where Claude thinking models would return 400 errors when the thinking.budget_tokens was greater than or equal to max_tokens. Changes: - Add MaxCompletionTokens: 128000 to all Claude thinking model definitions - Add ensureMaxTokensForThinking() function in claude_executor.go that: - Checks if thinking is enabled with a budget_tokens value - Looks up the model's MaxCompletionTokens from the registry - Ensures max_tokens is set to at least the model's MaxCompletionTokens - Falls back to budget_tokens + 4000 buffer if registry lookup fails This ensures Anthropic API constraint (max_tokens > thinking.budget_tokens) is always satisfied when using extended thinking features. Fixes: #339 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-11-26 22:31:05 -08:00
Luis Pater	39621a0340	fix(translator): normalize function calls and outputs for consistent input processing Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Implemented logic to pair consecutive function calls and their outputs, ensuring proper sequencing for processing. - Adjusted `gemini_openai-responses_request.go` to normalize message structures and maintain expected flow.	2025-11-27 10:25:45 +08:00
Luis Pater	346b663079	fix(translator): handle non-JSON output gracefully in function call outputs - Updated handling of `output` in `gemini_openai-responses_request.go` to use `.Str` instead of `.Raw` when parsing non-JSON string outputs. - Added checks to distinguish between JSON and non-JSON `output` types for accurate `functionResponse` construction.	2025-11-27 09:40:00 +08:00
Luis Pater	0bcae68c6c	fix(translator): preserve raw JSON encoding in function call outputs Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Updated handling of `output` in `gemini_openai-responses_request.go` to use `.Raw` instead of `.String` for preserving original JSON encoding. - Ensured proper setting of raw JSON output when constructing `functionResponse`.	2025-11-27 08:26:53 +08:00
Luis Pater	c8cee547fd	fix(translator): ensure partial content is retained while skipping encrypted thoughtSignature Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Updated handling of `thoughtSignature` across all translator modules to retain other content payloads if present. - Adjusted logic for `thought_signature` and `inline_data` keys for consistent processing.	2025-11-27 00:52:17 +08:00
Luis Pater	36755421fe	Merge pull request #343 from router-for-me/misc style(amp): tidy whitespace in proxy module and tests	2025-11-26 19:03:07 +08:00
hkfires	6c17dbc4da	style(amp): tidy whitespace in proxy module and tests	2025-11-26 18:57:26 +08:00
Luis Pater	ee6429cc75	feat(registry): add Gemini 3 Pro Image Preview model and remove Claude Sonnet 4.5 Thinking Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Added new `Gemini 3 Pro Image Preview` model with detailed metadata and configuration. - Removed outdated `Claude Sonnet 4.5 Thinking` model definition for cleanup and relevance.	2025-11-26 18:22:40 +08:00
Luis Pater	a4a26d978e	Fixed: #339 Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details feat(handlers, executor): add Gemini 3 Pro Preview support and refine Claude system instructions - Added support for the new "Gemini 3 Pro Preview" action in Gemini handlers, including detailed metadata and configuration. - Removed redundant `cache_control` field from Claude system instructions for cleaner payload structure.	2025-11-26 11:42:57 +08:00
Luis Pater	ed9f6e897e	Fixed: #337 Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details fix(executor): replace redundant commented code with `checkSystemInstructions` helper - Replaced commented-out `sjson.SetRawBytes` lines with the new `checkSystemInstructions` function. - Centralized system instruction handling for better code clarity and reuse. - Ensured consistent logic for managing `system` field across Claude executor flows.	2025-11-26 08:27:48 +08:00
Luis Pater	9c1e3c0687	Merge pull request #334 from nestharus/feat/claude-thinking-and-beta-headers Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details feat(claude): add thinking model variants and beta headers support	2025-11-26 02:17:02 +08:00
Luis Pater	2e5681ea32	Merge branch 'dev' into feat/claude-thinking-and-beta-headers	2025-11-26 02:16:40 +08:00
Luis Pater	52c17f03a5	fix(executor): comment out redundant code for setting Claude system instructions Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Commented out multiple instances of `sjson.SetRawBytes` for setting `system` key to Claude instructions as they are redundant. - Code cleanup to improve clarity and maintainability without affecting functionality.	2025-11-26 02:06:16 +08:00
nestharus	d0e694d4ed	feat(claude): add thinking model variants and beta headers support - Add Claude thinking model definitions (sonnet-4-5-thinking, opus-4-5-thinking variants) - Add Thinking support for antigravity models with -thinking suffix - Add injectThinkingConfig() for automatic thinking budget based on model suffix - Add resolveUpstreamModel() mappings for thinking variants to actual Claude models - Add extractAndRemoveBetas() to convert betas array to anthropic-beta header - Update applyClaudeHeaders() to merge custom betas from request body Closes #324	2025-11-25 03:33:05 -08:00
Luis Pater	506f1117dd	fix(handlers): refactor API response capture to append data safely Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Introduced `appendAPIResponse` helper to preserve and append data to existing API responses. - Ensured newline inclusion when appending, if necessary. - Improved `nil` and data type checks for response handling. - Updated middleware to skip request logging for `GET` requests.	2025-11-25 11:37:02 +08:00
Luis Pater	113db3c5bf	fix(executor): update antigravity executor to enhance model metadata handling Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Added additional metadata fields (`Name`, `Description`, `DisplayName`, `Version`) to `ModelInfo` struct initialization for better model representation. - Removed unnecessary whitespace in the code.	2025-11-25 09:19:01 +08:00
Luis Pater	1aa0b6cd11	Merge pull request #322 from ben-vargas/feat-claude-opus-4-5 Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details feat(registry): add Claude Opus 4.5 model definition	2025-11-25 08:38:06 +08:00
Ben Vargas	0895533400	fix(registry): correct Claude Opus 4.5 created timestamp Update epoch from 1730419200 (2024-11-01) to 1761955200 (2025-11-01).	2025-11-24 12:27:23 -07:00
Ben Vargas	43f007c234	feat(registry): add Claude Opus 4.5 model definition Add support for claude-opus-4-5-20251101 with 200K context window and 64K max output tokens.	2025-11-24 12:26:39 -07:00
Luis Pater	0ceee56d99	Merge pull request #318 from router-for-me/log Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details feat(logs): add limit query param to cap returned logs	2025-11-24 20:35:28 +08:00
hkfires	943a8c74df	feat(logs): add limit query param to cap returned logs	2025-11-24 19:59:24 +08:00
Luis Pater	0a47b452e9	fix(translator): add conditional check for key renaming in Gemini tools Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Ensured `functionDeclarations` key renaming only occurs if the key exists in Gemini tools processing. - Prevented unnecessary JSON reassignment when the target key is absent.	2025-11-24 17:15:43 +08:00
Luis Pater	261f08a82a	fix(translator): adjust key renaming logic in Gemini request processing - Fixed parameter key renaming to correctly handle `functionDeclarations` and `parametersJsonSchema` in Gemini tools. - Resolved potential overwriting issue by reassigning JSON strings after each key rename.	2025-11-24 17:12:04 +08:00
Luis Pater	d114d8d0bd	feat(config): add TLS support for HTTPS server configuration Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Introduced `TLSConfig` to support HTTPS configurations, including enabling TLS, specifying certificate and key files. - Updated HTTP server logic to handle HTTPS mode when TLS is enabled. - Enhanced `config.example.yaml` with TLS settings example. - Adjusted internal URL generation to respect protocol based on TLS state.	2025-11-24 10:41:29 +08:00
Luis Pater	bb9955e461	fix(auth): resolve index reassignment issue during auth management - Fixed improper handling of `indexAssigned` and `Index` during auth reassignment. - Ensured `EnsureIndex` is invoked after validating existing auth entries.	2025-11-24 10:10:09 +08:00
Luis Pater	7063a176f4	#293 feat(retry): add configurable retry logic with cooldown support - Introduced `max-retry-interval` configuration for cooldown durations between retries. - Added `SetRetryConfig` in `Manager` to handle retry attempts and cooldown intervals. - Enhanced provider execution logic to include retry attempts, cooldown management, and dynamic wait periods. - Updated API endpoints and YAML configuration to support `max-retry-interval`.	2025-11-24 09:55:15 +08:00
Luis Pater	e3082887a6	feat(logging, middleware): add error-based logging support and error log management Some checks failed docker-image / docker (push) Has been cancelled Details goreleaser / goreleaser (push) Has been cancelled Details - Introduced `logOnErrorOnly` mode to enable logging only for error responses when request logging is disabled. - Added endpoints to list and download error logs (`/request-error-logs`). - Implemented error log file cleanup to retain only the newest 10 logs. - Refactored `ResponseWriterWrapper` to support forced logging for error responses. - Enhanced middleware to capture data for upstream error persistence. - Improved log file naming and error log filename generation.	2025-11-23 22:41:57 +08:00
Luis Pater	ddb0c0ec1c	fix(translator): reintroduce `thoughtSignature` bypass logic for model parts - Restored `thoughtSignature` validator bypass for model-specific parts in Gemini content processing. - Removed redundant logic from the `executor` for cleaner handling.	2025-11-23 20:52:23 +08:00