From 7061cd60582da0ba06ac0bce907bc529f93c062e Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 26 Oct 2025 19:35:22 +0800 Subject: [PATCH 1/4] fix(gemini): map responseModalities to uppercase IMAGE/TEXT --- internal/runtime/executor/gemini_cli_executor.go | 2 +- .../openai/chat-completions/gemini-cli_openai_request.go | 6 +++--- .../gemini/openai/chat-completions/gemini_openai_request.go | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/runtime/executor/gemini_cli_executor.go b/internal/runtime/executor/gemini_cli_executor.go index c59d4f34..3d7a539d 100644 --- a/internal/runtime/executor/gemini_cli_executor.go +++ b/internal/runtime/executor/gemini_cli_executor.go @@ -703,7 +703,7 @@ func fixGeminiCLIImageAspectRatio(modelName string, rawJSON []byte) []byte { } rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.contents.0.parts", []byte(newPartsJson)) - rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["Image", "Text"]`)) + rawJSON, _ = sjson.SetRawBytes(rawJSON, "request.generationConfig.responseModalities", []byte(`["IMAGE", "TEXT"]`)) } } rawJSON, _ = sjson.DeleteBytes(rawJSON, "request.generationConfig.imageConfig") diff --git a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go index a7d7002d..28163193 100644 --- a/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go +++ b/internal/translator/gemini-cli/openai/chat-completions/gemini-cli_openai_request.go @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGeminiCLI(modelName string, inputRawJSON []byte, _ bo } // Map OpenAI modalities -> Gemini CLI request.generationConfig.responseModalities - // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + // e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"] if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { var responseMods []string for _, m := range mods.Array() { switch strings.ToLower(m.String()) { case "text": - responseMods = append(responseMods, "Text") + responseMods = append(responseMods, "TEXT") case "image": - responseMods = append(responseMods, "Image") + responseMods = append(responseMods, "IMAGE") } } if len(responseMods) > 0 { diff --git a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go index 98de3195..44cad7d2 100644 --- a/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go +++ b/internal/translator/gemini/openai/chat-completions/gemini_openai_request.go @@ -66,15 +66,15 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool) } // Map OpenAI modalities -> Gemini generationConfig.responseModalities - // e.g. "modalities": ["image", "text"] -> ["Image", "Text"] + // e.g. "modalities": ["image", "text"] -> ["IMAGE", "TEXT"] if mods := gjson.GetBytes(rawJSON, "modalities"); mods.Exists() && mods.IsArray() { var responseMods []string for _, m := range mods.Array() { switch strings.ToLower(m.String()) { case "text": - responseMods = append(responseMods, "Text") + responseMods = append(responseMods, "TEXT") case "image": - responseMods = append(responseMods, "Image") + responseMods = append(responseMods, "IMAGE") } } if len(responseMods) > 0 { From f3f31274e82e642e730a3c60a3b89fd62a8e47ed Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 26 Oct 2025 20:01:46 +0800 Subject: [PATCH 2/4] refactor(wsrelay): rename RoundTrip to NonStream --- internal/runtime/executor/aistudio_executor.go | 4 ++-- internal/wsrelay/http.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index de90c63a..1765cd0e 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -72,7 +72,7 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, AuthValue: authValue, }) - wsResp, err := e.relay.RoundTrip(ctx, e.provider, wsReq) + wsResp, err := e.relay.NonStream(ctx, e.provider, wsReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) return resp, err @@ -220,7 +220,7 @@ func (e *AistudioExecutor) CountTokens(ctx context.Context, auth *cliproxyauth.A AuthType: authType, AuthValue: authValue, }) - resp, err := e.relay.RoundTrip(ctx, e.provider, wsReq) + resp, err := e.relay.NonStream(ctx, e.provider, wsReq) if err != nil { recordAPIResponseError(ctx, e.cfg, err) return cliproxyexecutor.Response{}, err diff --git a/internal/wsrelay/http.go b/internal/wsrelay/http.go index f34a61ca..52ea2a1d 100644 --- a/internal/wsrelay/http.go +++ b/internal/wsrelay/http.go @@ -35,8 +35,8 @@ type StreamEvent struct { Err error } -// RoundTrip executes a non-streaming HTTP request using the websocket provider. -func (m *Manager) RoundTrip(ctx context.Context, provider string, req *HTTPRequest) (*HTTPResponse, error) { +// NonStream executes a non-streaming HTTP request using the websocket provider. +func (m *Manager) NonStream(ctx context.Context, provider string, req *HTTPRequest) (*HTTPResponse, error) { if req == nil { return nil, fmt.Errorf("wsrelay: request is nil") } From 7f266aa19e762ff0470b4ab58e8dc5fa2dc17a14 Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 26 Oct 2025 20:21:45 +0800 Subject: [PATCH 3/4] fix(aistudio): ensure colon-spaced JSON in responses --- .../runtime/executor/aistudio_executor.go | 54 +++++++++++++++++-- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/internal/runtime/executor/aistudio_executor.go b/internal/runtime/executor/aistudio_executor.go index 1765cd0e..4e568cf6 100644 --- a/internal/runtime/executor/aistudio_executor.go +++ b/internal/runtime/executor/aistudio_executor.go @@ -3,6 +3,7 @@ package executor import ( "bytes" "context" + "encoding/json" "fmt" "net/http" "net/url" @@ -87,7 +88,7 @@ func (e *AistudioExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, reporter.publish(ctx, parseGeminiUsage(wsResp.Body)) var param any out := sdktranslator.TranslateNonStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), bytes.Clone(translatedReq), bytes.Clone(wsResp.Body), ¶m) - resp = cliproxyexecutor.Response{Payload: []byte(out)} + resp = cliproxyexecutor.Response{Payload: ensureColonSpacedJSON([]byte(out))} return resp, nil } @@ -156,7 +157,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth } lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(filtered), ¶m) for i := range lines { - out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} + out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))} } break } @@ -172,7 +173,7 @@ func (e *AistudioExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth } lines := sdktranslator.TranslateStream(ctx, body.toFormat, opts.SourceFormat, req.Model, bytes.Clone(opts.OriginalRequest), translatedReq, bytes.Clone(event.Payload), ¶m) for i := range lines { - out <- cliproxyexecutor.StreamChunk{Payload: []byte(lines[i])} + out <- cliproxyexecutor.StreamChunk{Payload: ensureColonSpacedJSON([]byte(lines[i]))} } reporter.publish(ctx, parseGeminiUsage(event.Payload)) return @@ -346,3 +347,50 @@ func stripUsageMetadataFromJSON(rawJSON []byte) ([]byte, bool) { } return cleaned, true } + +// ensureColonSpacedJSON normalizes JSON objects so that colons are followed by a single space while +// keeping the payload otherwise compact. Non-JSON inputs are returned unchanged. +func ensureColonSpacedJSON(payload []byte) []byte { + trimmed := bytes.TrimSpace(payload) + if len(trimmed) == 0 { + return payload + } + + var decoded any + if err := json.Unmarshal(trimmed, &decoded); err != nil { + return payload + } + + indented, err := json.MarshalIndent(decoded, "", " ") + if err != nil { + return payload + } + + compacted := make([]byte, 0, len(indented)) + inString := false + skipSpace := false + + for i := 0; i < len(indented); i++ { + ch := indented[i] + if ch == '"' && (i == 0 || indented[i-1] != '\\') { + inString = !inString + } + + if !inString { + if ch == '\n' || ch == '\r' { + skipSpace = true + continue + } + if skipSpace { + if ch == ' ' || ch == '\t' { + continue + } + skipSpace = false + } + } + + compacted = append(compacted, ch) + } + + return compacted +} From e370f86f636f5500f4bf3f4e79ff5a2ff5700caa Mon Sep 17 00:00:00 2001 From: hkfires <10558748+hkfires@users.noreply.github.com> Date: Sun, 26 Oct 2025 21:26:15 +0800 Subject: [PATCH 4/4] fix(gemini-executor): uppercase responseModalities --- internal/runtime/executor/gemini_executor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/runtime/executor/gemini_executor.go b/internal/runtime/executor/gemini_executor.go index 180f07fb..e3008cef 100644 --- a/internal/runtime/executor/gemini_executor.go +++ b/internal/runtime/executor/gemini_executor.go @@ -494,7 +494,7 @@ func fixGeminiImageAspectRatio(modelName string, rawJSON []byte) []byte { } rawJSON, _ = sjson.SetRawBytes(rawJSON, "contents.0.parts", []byte(newPartsJson)) - rawJSON, _ = sjson.SetRawBytes(rawJSON, "generationConfig.responseModalities", []byte(`["Image", "Text"]`)) + rawJSON, _ = sjson.SetRawBytes(rawJSON, "generationConfig.responseModalities", []byte(`["IMAGE", "TEXT"]`)) } } rawJSON, _ = sjson.DeleteBytes(rawJSON, "generationConfig.imageConfig")