Compare commits

..

5 Commits

Author SHA1 Message Date
Luis Pater
83a1fa618d Merge pull request #52 from router-for-me/gemini-web
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
Add support for image generation with Gemini models through the OpenAI chat completions translator.
2025-09-20 20:12:51 +08:00
hkfires
9253bdbf77 feat(provider): Introduce dedicated provider type for Gemini-Web 2025-09-20 19:47:58 +08:00
hkfires
41effa5aeb feat(gemini-web): Add support for image generation with Gemini models through the OpenAI chat completions translator. 2025-09-20 19:34:53 +08:00
Luis Pater
b07ed71de2 Merge pull request #51 from router-for-me/gemini-web
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
feat(gemini-web): Add support for real Nano Banana model
2025-09-20 14:26:03 +08:00
hkfires
deaa64b080 feat(gemini-web): Add support for real Nano Banana model 2025-09-20 13:35:27 +08:00
8 changed files with 169 additions and 12 deletions

View File

@@ -33,6 +33,10 @@ type GeminiClient struct {
accountLabel string
}
var NanoBananaModel = map[string]struct{}{
"gemini-2.5-flash-image-preview": {},
}
// NewGeminiClient creates a client. Pass empty strings to auto-detect via browser cookies (not implemented in Go port).
func NewGeminiClient(secure1psid string, secure1psidts string, proxy string, opts ...func(*GeminiClient)) *GeminiClient {
c := &GeminiClient{
@@ -239,6 +243,14 @@ func (c *GeminiClient) GenerateContent(prompt string, files []string, model Mode
}
}
func ensureAnyLen(slice []any, index int) []any {
if index < len(slice) {
return slice
}
gap := index + 1 - len(slice)
return append(slice, make([]any, gap)...)
}
func (c *GeminiClient) generateOnce(prompt string, files []string, model Model, gem *Gem, chat *ChatSession) (ModelOutput, error) {
var empty ModelOutput
// Build f.req
@@ -266,6 +278,14 @@ func (c *GeminiClient) generateOnce(prompt string, files []string, model Model,
}
inner := []any{item0, nil, item2}
requestedModel := strings.ToLower(model.Name)
if chat != nil && chat.RequestedModel() != "" {
requestedModel = chat.RequestedModel()
}
if _, ok := NanoBananaModel[requestedModel]; ok {
inner = ensureAnyLen(inner, 49)
inner[49] = 14
}
if gem != nil {
// pad with 16 nils then gem ID
for i := 0; i < 16; i++ {
@@ -674,16 +694,17 @@ func truncateForLog(s string, n int) string {
// StartChat returns a ChatSession attached to the client
func (c *GeminiClient) StartChat(model Model, gem *Gem, metadata []string) *ChatSession {
return &ChatSession{client: c, metadata: normalizeMeta(metadata), model: model, gem: gem}
return &ChatSession{client: c, metadata: normalizeMeta(metadata), model: model, gem: gem, requestedModel: strings.ToLower(model.Name)}
}
// ChatSession holds conversation metadata
type ChatSession struct {
client *GeminiClient
metadata []string // cid, rid, rcid
lastOutput *ModelOutput
model Model
gem *Gem
client *GeminiClient
metadata []string // cid, rid, rcid
lastOutput *ModelOutput
model Model
gem *Gem
requestedModel string
}
func (cs *ChatSession) String() string {
@@ -710,6 +731,10 @@ func normalizeMeta(v []string) []string {
func (cs *ChatSession) Metadata() []string { return cs.metadata }
func (cs *ChatSession) SetMetadata(v []string) { cs.metadata = normalizeMeta(v) }
func (cs *ChatSession) RequestedModel() string { return cs.requestedModel }
func (cs *ChatSession) SetRequestedModel(name string) {
cs.requestedModel = strings.ToLower(name)
}
func (cs *ChatSession) CID() string {
if len(cs.metadata) > 0 {
return cs.metadata[0]

View File

@@ -207,7 +207,7 @@ func (c *GeminiWebClient) registerModelsOnce() {
if c.modelsRegistered {
return
}
c.RegisterModels(GEMINI, geminiWeb.GetGeminiWebAliasedModels())
c.RegisterModels(GEMINIWEB, geminiWeb.GetGeminiWebAliasedModels())
c.modelsRegistered = true
}
@@ -219,8 +219,8 @@ func (c *GeminiWebClient) EnsureRegistered() {
}
}
func (c *GeminiWebClient) Type() string { return GEMINI }
func (c *GeminiWebClient) Provider() string { return GEMINI }
func (c *GeminiWebClient) Type() string { return GEMINIWEB }
func (c *GeminiWebClient) Provider() string { return GEMINIWEB }
func (c *GeminiWebClient) CanProvideModel(modelName string) bool {
geminiWeb.EnsureGeminiWebAliasMap()
_, ok := geminiWeb.GeminiWebAliasMap[strings.ToLower(modelName)]
@@ -394,6 +394,7 @@ func (c *GeminiWebClient) prepareChat(ctx context.Context, modelName string, raw
c.appendUpstreamRequestLog(ctx, modelName, res.tagged, true, res.prompt, len(uploadedFiles), res.reuse, res.metaLen)
gem := c.getConfiguredGem()
res.chat = c.gwc.StartChat(model, gem, meta)
res.chat.SetRequestedModel(modelName)
return res, nil
}

View File

@@ -3,6 +3,7 @@ package constant
const (
GEMINI = "gemini"
GEMINICLI = "gemini-cli"
GEMINIWEB = "gemini-web"
CODEX = "codex"
CLAUDE = "claude"
OPENAI = "openai"

View File

@@ -0,0 +1,20 @@
package chat_completions
import (
. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
geminiChat "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/chat-completions"
"github.com/luispater/CLIProxyAPI/v5/internal/translator/translator"
)
func init() {
translator.Register(
OPENAI,
GEMINIWEB,
geminiChat.ConvertOpenAIRequestToGemini,
interfaces.TranslateResponse{
Stream: geminiChat.ConvertGeminiResponseToOpenAI,
NonStream: geminiChat.ConvertGeminiResponseToOpenAINonStream,
},
)
}

View File

@@ -0,0 +1,20 @@
package responses
import (
. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
geminiResponses "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/responses"
"github.com/luispater/CLIProxyAPI/v5/internal/translator/translator"
)
func init() {
translator.Register(
OPENAI_RESPONSE,
GEMINIWEB,
geminiResponses.ConvertOpenAIResponsesRequestToGemini,
interfaces.TranslateResponse{
Stream: geminiResponses.ConvertGeminiResponseToOpenAIResponses,
NonStream: geminiResponses.ConvertGeminiResponseToOpenAIResponsesNonStream,
},
)
}

View File

@@ -170,6 +170,31 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
node := []byte(`{"role":"model","parts":[{"text":""}]}`)
node, _ = sjson.SetBytes(node, "parts.0.text", content.String())
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
} else if content.IsArray() {
// Assistant multimodal content (e.g. text + image) -> single model content with parts
node := []byte(`{"role":"model","parts":[]}`)
p := 0
for _, item := range content.Array() {
switch item.Get("type").String() {
case "text":
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
p++
case "image_url":
// If the assistant returned an inline data URL, preserve it for history fidelity.
imageURL := item.Get("image_url.url").String()
if len(imageURL) > 5 { // expect data:...
pieces := strings.SplitN(imageURL[5:], ";", 2)
if len(pieces) == 2 && len(pieces[1]) > 7 {
mime := pieces[0]
data := pieces[1][7:]
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
p++
}
}
}
}
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
} else if !content.Exists() || content.Type == gjson.Null {
// Tool calls -> single model content with functionCall parts
tcs := m.Get("tool_calls")

View File

@@ -8,6 +8,7 @@ package chat_completions
import (
"bytes"
"context"
"encoding/json"
"fmt"
"time"
@@ -99,6 +100,10 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
partResult := partResults[i]
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
inlineDataResult := partResult.Get("inlineData")
if !inlineDataResult.Exists() {
inlineDataResult = partResult.Get("inline_data")
}
if partTextResult.Exists() {
// Handle text content, distinguishing between regular content and reasoning/thoughts.
@@ -124,6 +129,34 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
} else if inlineDataResult.Exists() {
data := inlineDataResult.Get("data").String()
if data == "" {
continue
}
mimeType := inlineDataResult.Get("mimeType").String()
if mimeType == "" {
mimeType = inlineDataResult.Get("mime_type").String()
}
if mimeType == "" {
mimeType = "image/png"
}
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
imagePayload, err := json.Marshal(map[string]any{
"type": "image_url",
"image_url": map[string]string{
"url": imageURL,
},
})
if err != nil {
continue
}
imagesResult := gjson.Get(template, "choices.0.delta.images")
if !imagesResult.Exists() || !imagesResult.IsArray() {
template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload))
}
}
}
@@ -193,6 +226,10 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
partResult := partsResults[i]
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
inlineDataResult := partResult.Get("inlineData")
if !inlineDataResult.Exists() {
inlineDataResult = partResult.Get("inline_data")
}
if partTextResult.Exists() {
// Append text content, distinguishing between regular content and reasoning.
@@ -217,9 +254,34 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
}
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
} else {
// If no usable content is found, return an empty string.
return ""
} else if inlineDataResult.Exists() {
data := inlineDataResult.Get("data").String()
if data == "" {
continue
}
mimeType := inlineDataResult.Get("mimeType").String()
if mimeType == "" {
mimeType = inlineDataResult.Get("mime_type").String()
}
if mimeType == "" {
mimeType = "image/png"
}
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
imagePayload, err := json.Marshal(map[string]any{
"type": "image_url",
"image_url": map[string]string{
"url": imageURL,
},
})
if err != nil {
continue
}
imagesResult := gjson.Get(template, "choices.0.message.images")
if !imagesResult.Exists() || !imagesResult.IsArray() {
template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`)
}
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", string(imagePayload))
}
}
}

View File

@@ -23,6 +23,9 @@ import (
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/chat-completions"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/responses"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini-web/openai/chat-completions"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini-web/openai/responses"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/claude"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/gemini"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/gemini-cli"