Compare commits

...

14 Commits

Author SHA1 Message Date
Luis Pater
83a1fa618d Merge pull request #52 from router-for-me/gemini-web
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
Add support for image generation with Gemini models through the OpenAI chat completions translator.
2025-09-20 20:12:51 +08:00
hkfires
9253bdbf77 feat(provider): Introduce dedicated provider type for Gemini-Web 2025-09-20 19:47:58 +08:00
hkfires
41effa5aeb feat(gemini-web): Add support for image generation with Gemini models through the OpenAI chat completions translator. 2025-09-20 19:34:53 +08:00
Luis Pater
b07ed71de2 Merge pull request #51 from router-for-me/gemini-web
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
feat(gemini-web): Add support for real Nano Banana model
2025-09-20 14:26:03 +08:00
hkfires
deaa64b080 feat(gemini-web): Add support for real Nano Banana model 2025-09-20 13:35:27 +08:00
Luis Pater
d42384cdb7 Merge branch 'dev'
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
2025-09-20 01:38:36 +08:00
Luis Pater
24f243a1bc feat: add support for Gemini 2.5 Flash image preview alias
- Introduced `gemini-2.5-flash-image-preview` alias in `GeminiWebAliasMap` for enhanced model handling.
- Added `gemini-2.5-flash-image-preview` as a new model variant with custom ID, name, display name, and description.
2025-09-20 01:37:42 +08:00
Luis Pater
67a4fe703c Merge branch 'dev'
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
2025-09-20 00:33:36 +08:00
Luis Pater
c16a989287 Merge pull request #50 from router-for-me/auth-dev 2025-09-20 00:31:08 +08:00
Luis Pater
bc376ad419 Merge pull request #49 from router-for-me/gemini-web 2025-09-20 00:28:46 +08:00
hkfires
aba719f5fe refactor(auth): Centralize auth file reading with snapshot preference
The logic for reading authentication files, which includes retries and a preference for cookie snapshot files, was previously implemented locally within the `watcher` package. This was done to handle potential file locks during writes.

This change moves this functionality into a shared `ReadAuthFileWithRetry` function in the `util` package to promote code reuse and consistency.

The `watcher` package is updated to use this new centralized function. Additionally, the initial token loading in the `run` command now also uses this logic, making it more resilient to file access issues and consistent with the watcher's behavior.
2025-09-20 00:14:26 +08:00
hkfires
1d7abc95b8 fix(gemini-web): ensure colon spacing in JSON output for compatibility 2025-09-19 23:32:52 +08:00
hkfires
1dccdb7ff2 Merge branch 'cookie_snapshot' into dev 2025-09-19 12:40:59 +08:00
hkfires
395164e2d4 feat(log): Add separator when saving client credentials 2025-09-19 12:36:17 +08:00
13 changed files with 269 additions and 57 deletions

View File

@@ -33,6 +33,10 @@ type GeminiClient struct {
accountLabel string
}
var NanoBananaModel = map[string]struct{}{
"gemini-2.5-flash-image-preview": {},
}
// NewGeminiClient creates a client. Pass empty strings to auto-detect via browser cookies (not implemented in Go port).
func NewGeminiClient(secure1psid string, secure1psidts string, proxy string, opts ...func(*GeminiClient)) *GeminiClient {
c := &GeminiClient{
@@ -239,6 +243,14 @@ func (c *GeminiClient) GenerateContent(prompt string, files []string, model Mode
}
}
func ensureAnyLen(slice []any, index int) []any {
if index < len(slice) {
return slice
}
gap := index + 1 - len(slice)
return append(slice, make([]any, gap)...)
}
func (c *GeminiClient) generateOnce(prompt string, files []string, model Model, gem *Gem, chat *ChatSession) (ModelOutput, error) {
var empty ModelOutput
// Build f.req
@@ -266,6 +278,14 @@ func (c *GeminiClient) generateOnce(prompt string, files []string, model Model,
}
inner := []any{item0, nil, item2}
requestedModel := strings.ToLower(model.Name)
if chat != nil && chat.RequestedModel() != "" {
requestedModel = chat.RequestedModel()
}
if _, ok := NanoBananaModel[requestedModel]; ok {
inner = ensureAnyLen(inner, 49)
inner[49] = 14
}
if gem != nil {
// pad with 16 nils then gem ID
for i := 0; i < 16; i++ {
@@ -674,16 +694,17 @@ func truncateForLog(s string, n int) string {
// StartChat returns a ChatSession attached to the client
func (c *GeminiClient) StartChat(model Model, gem *Gem, metadata []string) *ChatSession {
return &ChatSession{client: c, metadata: normalizeMeta(metadata), model: model, gem: gem}
return &ChatSession{client: c, metadata: normalizeMeta(metadata), model: model, gem: gem, requestedModel: strings.ToLower(model.Name)}
}
// ChatSession holds conversation metadata
type ChatSession struct {
client *GeminiClient
metadata []string // cid, rid, rcid
lastOutput *ModelOutput
model Model
gem *Gem
client *GeminiClient
metadata []string // cid, rid, rcid
lastOutput *ModelOutput
model Model
gem *Gem
requestedModel string
}
func (cs *ChatSession) String() string {
@@ -710,6 +731,10 @@ func normalizeMeta(v []string) []string {
func (cs *ChatSession) Metadata() []string { return cs.metadata }
func (cs *ChatSession) SetMetadata(v []string) { cs.metadata = normalizeMeta(v) }
func (cs *ChatSession) RequestedModel() string { return cs.requestedModel }
func (cs *ChatSession) SetRequestedModel(name string) {
cs.requestedModel = strings.ToLower(name)
}
func (cs *ChatSession) CID() string {
if len(cs.metadata) > 0 {
return cs.metadata[0]

View File

@@ -1,6 +1,7 @@
package geminiwebapi
import (
"bytes"
"encoding/json"
"fmt"
"math"
@@ -137,5 +138,41 @@ func ConvertOutputToGemini(output *ModelOutput, modelName string, promptText str
if err != nil {
return nil, fmt.Errorf("failed to marshal gemini response: %w", err)
}
return b, nil
return ensureColonSpacing(b), nil
}
// ensureColonSpacing inserts a single space after JSON key-value colons while
// leaving string content untouched. This matches the relaxed formatting used by
// Gemini responses and keeps downstream text-processing tools compatible with
// the proxy output.
func ensureColonSpacing(b []byte) []byte {
if len(b) == 0 {
return b
}
var out bytes.Buffer
out.Grow(len(b) + len(b)/8)
inString := false
escaped := false
for i := 0; i < len(b); i++ {
ch := b[i]
out.WriteByte(ch)
if escaped {
escaped = false
continue
}
switch ch {
case '\\':
escaped = true
case '"':
inString = !inString
case ':':
if !inString && i+1 < len(b) {
next := b[i+1]
if next != ' ' && next != '\n' && next != '\r' && next != '\t' {
out.WriteByte(' ')
}
}
}
}
return out.Bytes()
}

View File

@@ -116,6 +116,8 @@ func EnsureGeminiWebAliasMap() {
for _, m := range registry.GetGeminiModels() {
if m.ID == "gemini-2.5-flash-lite" {
continue
} else if m.ID == "gemini-2.5-flash" {
GeminiWebAliasMap["gemini-2.5-flash-image-preview"] = "gemini-2.5-flash"
}
alias := AliasFromModelID(m.ID)
GeminiWebAliasMap[strings.ToLower(alias)] = strings.ToLower(m.ID)
@@ -130,6 +132,13 @@ func GetGeminiWebAliasedModels() []*registry.ModelInfo {
for _, m := range registry.GetGeminiModels() {
if m.ID == "gemini-2.5-flash-lite" {
continue
} else if m.ID == "gemini-2.5-flash" {
cpy := *m
cpy.ID = "gemini-2.5-flash-image-preview"
cpy.Name = "gemini-2.5-flash-image-preview"
cpy.DisplayName = "Nano Banana"
cpy.Description = "Gemini 2.5 Flash Preview Image"
aliased = append(aliased, &cpy)
}
cpy := *m
cpy.ID = AliasFromModelID(m.ID)

View File

@@ -207,7 +207,7 @@ func (c *GeminiWebClient) registerModelsOnce() {
if c.modelsRegistered {
return
}
c.RegisterModels(GEMINI, geminiWeb.GetGeminiWebAliasedModels())
c.RegisterModels(GEMINIWEB, geminiWeb.GetGeminiWebAliasedModels())
c.modelsRegistered = true
}
@@ -219,8 +219,8 @@ func (c *GeminiWebClient) EnsureRegistered() {
}
}
func (c *GeminiWebClient) Type() string { return GEMINI }
func (c *GeminiWebClient) Provider() string { return GEMINI }
func (c *GeminiWebClient) Type() string { return GEMINIWEB }
func (c *GeminiWebClient) Provider() string { return GEMINIWEB }
func (c *GeminiWebClient) CanProvideModel(modelName string) bool {
geminiWeb.EnsureGeminiWebAliasMap()
_, ok := geminiWeb.GeminiWebAliasMap[strings.ToLower(modelName)]
@@ -394,6 +394,7 @@ func (c *GeminiWebClient) prepareChat(ctx context.Context, modelName string, raw
c.appendUpstreamRequestLog(ctx, modelName, res.tagged, true, res.prompt, len(uploadedFiles), res.reuse, res.metaLen)
gem := c.getConfiguredGem()
res.chat = c.gwc.StartChat(model, gem, meta)
res.chat.SetRequestedModel(modelName)
return res, nil
}

View File

@@ -26,6 +26,7 @@ import (
"github.com/luispater/CLIProxyAPI/v5/internal/config"
"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
"github.com/luispater/CLIProxyAPI/v5/internal/misc"
"github.com/luispater/CLIProxyAPI/v5/internal/util"
"github.com/luispater/CLIProxyAPI/v5/internal/watcher"
log "github.com/sirupsen/logrus"
"github.com/tidwall/gjson"
@@ -76,7 +77,7 @@ func StartService(cfg *config.Config, configPath string) {
if !info.IsDir() && strings.HasSuffix(info.Name(), ".json") {
misc.LogCredentialSeparator()
log.Debugf("Loading token from: %s", path)
data, errReadFile := os.ReadFile(path)
data, errReadFile := util.ReadAuthFilePreferSnapshot(path)
if errReadFile != nil {
return errReadFile
}
@@ -344,6 +345,7 @@ func StartService(cfg *config.Config, configPath string) {
}
activeClientsMu.RUnlock()
for _, c := range snapshot {
misc.LogCredentialSeparator()
// Persist tokens/cookies then unregister/cleanup per client.
_ = c.SaveTokenToFile()
switch u := any(c).(type) {

View File

@@ -3,6 +3,7 @@ package constant
const (
GEMINI = "gemini"
GEMINICLI = "gemini-cli"
GEMINIWEB = "gemini-web"
CODEX = "codex"
CLAUDE = "claude"
OPENAI = "openai"

View File

@@ -0,0 +1,20 @@
package chat_completions
import (
. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
geminiChat "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/chat-completions"
"github.com/luispater/CLIProxyAPI/v5/internal/translator/translator"
)
func init() {
translator.Register(
OPENAI,
GEMINIWEB,
geminiChat.ConvertOpenAIRequestToGemini,
interfaces.TranslateResponse{
Stream: geminiChat.ConvertGeminiResponseToOpenAI,
NonStream: geminiChat.ConvertGeminiResponseToOpenAINonStream,
},
)
}

View File

@@ -0,0 +1,20 @@
package responses
import (
. "github.com/luispater/CLIProxyAPI/v5/internal/constant"
"github.com/luispater/CLIProxyAPI/v5/internal/interfaces"
geminiResponses "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/responses"
"github.com/luispater/CLIProxyAPI/v5/internal/translator/translator"
)
func init() {
translator.Register(
OPENAI_RESPONSE,
GEMINIWEB,
geminiResponses.ConvertOpenAIResponsesRequestToGemini,
interfaces.TranslateResponse{
Stream: geminiResponses.ConvertGeminiResponseToOpenAIResponses,
NonStream: geminiResponses.ConvertGeminiResponseToOpenAIResponsesNonStream,
},
)
}

View File

@@ -170,6 +170,31 @@ func ConvertOpenAIRequestToGemini(modelName string, inputRawJSON []byte, _ bool)
node := []byte(`{"role":"model","parts":[{"text":""}]}`)
node, _ = sjson.SetBytes(node, "parts.0.text", content.String())
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
} else if content.IsArray() {
// Assistant multimodal content (e.g. text + image) -> single model content with parts
node := []byte(`{"role":"model","parts":[]}`)
p := 0
for _, item := range content.Array() {
switch item.Get("type").String() {
case "text":
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".text", item.Get("text").String())
p++
case "image_url":
// If the assistant returned an inline data URL, preserve it for history fidelity.
imageURL := item.Get("image_url.url").String()
if len(imageURL) > 5 { // expect data:...
pieces := strings.SplitN(imageURL[5:], ";", 2)
if len(pieces) == 2 && len(pieces[1]) > 7 {
mime := pieces[0]
data := pieces[1][7:]
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.mime_type", mime)
node, _ = sjson.SetBytes(node, "parts."+itoa(p)+".inlineData.data", data)
p++
}
}
}
}
out, _ = sjson.SetRawBytes(out, "contents.-1", node)
} else if !content.Exists() || content.Type == gjson.Null {
// Tool calls -> single model content with functionCall parts
tcs := m.Get("tool_calls")

View File

@@ -8,6 +8,7 @@ package chat_completions
import (
"bytes"
"context"
"encoding/json"
"fmt"
"time"
@@ -99,6 +100,10 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
partResult := partResults[i]
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
inlineDataResult := partResult.Get("inlineData")
if !inlineDataResult.Exists() {
inlineDataResult = partResult.Get("inline_data")
}
if partTextResult.Exists() {
// Handle text content, distinguishing between regular content and reasoning/thoughts.
@@ -124,6 +129,34 @@ func ConvertGeminiResponseToOpenAI(_ context.Context, _ string, originalRequestR
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.delta.tool_calls.-1", functionCallTemplate)
} else if inlineDataResult.Exists() {
data := inlineDataResult.Get("data").String()
if data == "" {
continue
}
mimeType := inlineDataResult.Get("mimeType").String()
if mimeType == "" {
mimeType = inlineDataResult.Get("mime_type").String()
}
if mimeType == "" {
mimeType = "image/png"
}
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
imagePayload, err := json.Marshal(map[string]any{
"type": "image_url",
"image_url": map[string]string{
"url": imageURL,
},
})
if err != nil {
continue
}
imagesResult := gjson.Get(template, "choices.0.delta.images")
if !imagesResult.Exists() || !imagesResult.IsArray() {
template, _ = sjson.SetRaw(template, "choices.0.delta.images", `[]`)
}
template, _ = sjson.Set(template, "choices.0.delta.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.delta.images.-1", string(imagePayload))
}
}
}
@@ -193,6 +226,10 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
partResult := partsResults[i]
partTextResult := partResult.Get("text")
functionCallResult := partResult.Get("functionCall")
inlineDataResult := partResult.Get("inlineData")
if !inlineDataResult.Exists() {
inlineDataResult = partResult.Get("inline_data")
}
if partTextResult.Exists() {
// Append text content, distinguishing between regular content and reasoning.
@@ -217,9 +254,34 @@ func ConvertGeminiResponseToOpenAINonStream(_ context.Context, _ string, origina
}
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.message.tool_calls.-1", functionCallItemTemplate)
} else {
// If no usable content is found, return an empty string.
return ""
} else if inlineDataResult.Exists() {
data := inlineDataResult.Get("data").String()
if data == "" {
continue
}
mimeType := inlineDataResult.Get("mimeType").String()
if mimeType == "" {
mimeType = inlineDataResult.Get("mime_type").String()
}
if mimeType == "" {
mimeType = "image/png"
}
imageURL := fmt.Sprintf("data:%s;base64,%s", mimeType, data)
imagePayload, err := json.Marshal(map[string]any{
"type": "image_url",
"image_url": map[string]string{
"url": imageURL,
},
})
if err != nil {
continue
}
imagesResult := gjson.Get(template, "choices.0.message.images")
if !imagesResult.Exists() || !imagesResult.IsArray() {
template, _ = sjson.SetRaw(template, "choices.0.message.images", `[]`)
}
template, _ = sjson.Set(template, "choices.0.message.role", "assistant")
template, _ = sjson.SetRaw(template, "choices.0.message.images.-1", string(imagePayload))
}
}
}

View File

@@ -23,6 +23,9 @@ import (
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/chat-completions"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini/openai/responses"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini-web/openai/chat-completions"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/gemini-web/openai/responses"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/claude"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/gemini"
_ "github.com/luispater/CLIProxyAPI/v5/internal/translator/openai/gemini-cli"

View File

@@ -6,6 +6,7 @@ import (
"os"
"path/filepath"
"strings"
"time"
"github.com/luispater/CLIProxyAPI/v5/internal/misc"
)
@@ -93,6 +94,52 @@ func WriteCookieSnapshot(mainPath string, v any) error {
return nil
}
// ReadAuthFilePreferSnapshot returns the first non-empty auth payload preferring snapshots.
func ReadAuthFilePreferSnapshot(path string) ([]byte, error) {
return ReadAuthFileWithRetry(path, 1, 0)
}
// ReadAuthFileWithRetry attempts to read an auth file multiple times and prefers cookie snapshots.
func ReadAuthFileWithRetry(path string, attempts int, delay time.Duration) ([]byte, error) {
if attempts < 1 {
attempts = 1
}
read := func(target string) ([]byte, error) {
var lastErr error
for i := 0; i < attempts; i++ {
data, err := os.ReadFile(target)
if err == nil {
return data, nil
}
lastErr = err
if i < attempts-1 {
time.Sleep(delay)
}
}
return nil, lastErr
}
candidates := []string{
CookieSnapshotPath(path),
path,
}
for idx, candidate := range candidates {
data, err := read(candidate)
if err == nil {
return data, nil
}
if errors.Is(err, os.ErrNotExist) {
if idx < len(candidates)-1 {
continue
}
}
return nil, err
}
return nil, os.ErrNotExist
}
// RemoveCookieSnapshots removes the snapshot file if it exists.
func RemoveCookieSnapshots(mainPath string) {
_ = RemoveFile(CookieSnapshotPath(mainPath))

View File

@@ -9,7 +9,6 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"errors"
"io/fs"
"net/http"
"os"
@@ -324,7 +323,7 @@ func (w *Watcher) reloadClients() {
// Rebuild auth file hash cache for current clients
w.lastAuthHashes = make(map[string]string, len(newFileClients))
for path := range newFileClients {
if data, err := readAuthFileWithRetry(path, authFileReadMaxAttempts, authFileReadRetryDelay); err == nil && len(data) > 0 {
if data, err := util.ReadAuthFileWithRetry(path, authFileReadMaxAttempts, authFileReadRetryDelay); err == nil && len(data) > 0 {
sum := sha256.Sum256(data)
w.lastAuthHashes[path] = hex.EncodeToString(sum[:])
}
@@ -353,7 +352,7 @@ func (w *Watcher) reloadClients() {
// createClientFromFile creates a single client instance from a given token file path.
func (w *Watcher) createClientFromFile(path string, cfg *config.Config) (interfaces.Client, error) {
data, errReadFile := readAuthFileWithRetry(path, authFileReadMaxAttempts, authFileReadRetryDelay)
data, errReadFile := util.ReadAuthFileWithRetry(path, authFileReadMaxAttempts, authFileReadRetryDelay)
if errReadFile != nil {
return nil, errReadFile
}
@@ -416,48 +415,9 @@ func (w *Watcher) clientsToSlice(clientMap map[string]interfaces.Client) []inter
return s
}
// readAuthFileWithRetry attempts to read the auth file multiple times to work around
// short-lived locks on Windows while token files are being written.
func readAuthFileWithRetry(path string, attempts int, delay time.Duration) ([]byte, error) {
read := func(target string) ([]byte, error) {
var lastErr error
for i := 0; i < attempts; i++ {
data, err := os.ReadFile(target)
if err == nil {
return data, nil
}
lastErr = err
if i < attempts-1 {
time.Sleep(delay)
}
}
return nil, lastErr
}
candidates := []string{
util.CookieSnapshotPath(path),
path,
}
for idx, candidate := range candidates {
data, err := read(candidate)
if err == nil {
return data, nil
}
if errors.Is(err, os.ErrNotExist) {
if idx < len(candidates)-1 {
continue
}
}
return nil, err
}
return nil, os.ErrNotExist
}
// addOrUpdateClient handles the addition or update of a single client.
func (w *Watcher) addOrUpdateClient(path string) {
data, errRead := readAuthFileWithRetry(path, authFileReadMaxAttempts, authFileReadRetryDelay)
data, errRead := util.ReadAuthFileWithRetry(path, authFileReadMaxAttempts, authFileReadRetryDelay)
if errRead != nil {
log.Errorf("failed to read auth file %s: %v", filepath.Base(path), errRead)
return