Compare commits

...

2 Commits

Author SHA1 Message Date
Luis Pater
f49a530c1a Refactor client handling and improve error responses
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
- Centralized client retrieval logic with `getClient` function for reduced redundancy.
- Simplified client rotation and error handling by removing excessive load balancing logic.
- Updated server address in `auth.go` to use dynamic binding (`:8085`).
2025-07-15 17:03:18 +08:00
Luis Pater
368796349e Add Docker support with CI/CD workflow and usage instructions
Some checks failed
docker-image / docker (push) Has been cancelled
goreleaser / goreleaser (push) Has been cancelled
- Added `.github/workflows/docker-image.yml` for automated Docker image build and push on version tags.
- Created `Dockerfile` to containerize the application.
- Updated README with instructions for running the application using Docker.
2025-07-14 16:50:51 +08:00
6 changed files with 89 additions and 52 deletions

42
.github/workflows/docker-image.yml vendored Normal file
View File

@@ -0,0 +1,42 @@
name: docker-image
on:
push:
tags:
- v*
env:
APP_NAME: CLIProxyAPI
DOCKERHUB_REPO: eceasy/cli-proxy-api
jobs:
docker:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Generate App Version
run: echo APP_VERSION=`git describe --tags --always` >> $GITHUB_ENV
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
platforms: |
linux/amd64
linux/arm64
push: true
build-args: |
APP_NAME=${{ env.APP_NAME }}
APP_VERSION=${{ env.APP_VERSION }}
tags: |
${{ env.DOCKERHUB_REPO }}:latest
${{ env.DOCKERHUB_REPO }}:${{ env.APP_VERSION }}

23
Dockerfile Normal file
View File

@@ -0,0 +1,23 @@
FROM golang:1.24-alpine AS builder
WORKDIR /app
COPY go.mod go.sum ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -o ./CLIProxyAPI ./cmd/server/
FROM alpine:3.22.0
RUN mkdir /CLIProxyAPI
COPY --from=builder ./app/CLIProxyAPI /CLIProxyAPI/CLIProxyAPI
WORKDIR /CLIProxyAPI
EXPOSE 8317
CMD ["./CLIProxyAPI"]

View File

@@ -212,6 +212,20 @@ The server will relay the `loadCodeAssist`, `onboardUser`, and `countTokens` req
> This feature only allows local access because I couldn't find a way to authenticate the requests.
> I hardcoded `127.0.0.1` into the load balancing.
## Run with Docker
Run the following command to login:
```bash
docker run --rm -p 8085:8085 -v /path/to/your/config.yaml:/CLIProxyAPI/config.yaml -v /path/to/your/auth-dir:/root/.cli-proxy-api eceasy/cli-proxy-api:latest /CLIProxyAPI/CLIProxyAPI --login
```
Run the following command to start the server:
```bash
docker run --rm -p 8317:8317 -v /path/to/your/config.yaml:/CLIProxyAPI/config.yaml -v /path/to/your/auth-dir:/root/.cli-proxy-api eceasy/cli-proxy-api:latest
```
## Contributing
Contributions are welcome! Please feel free to submit a Pull Request.

View File

@@ -78,62 +78,16 @@ func (h *APIHandlers) ClaudeMessages(c *gin.Context) {
// This loop implements a sophisticated load balancing and failover mechanism
outLoop:
for {
// Thread-safe client index rotation to distribute load evenly
// This ensures fair usage across all available clients
mutex.Lock()
startIndex := lastUsedClientIndex
currentIndex := (startIndex + 1) % len(h.cliClients)
lastUsedClientIndex = currentIndex
mutex.Unlock()
// Build a list of available clients, starting from the next client in rotation
// This implements round-robin load balancing while filtering out quota-exceeded clients
reorderedClients := make([]*client.Client, 0)
for i := 0; i < len(h.cliClients); i++ {
cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
// Skip clients that have exceeded their quota for the requested model
if cliClient.IsModelQuotaExceeded(modelName) {
// Log different messages based on authentication method (API key vs account)
if cliClient.GetGenerativeLanguageAPIKey() == "" {
log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
} else {
log.Debugf("Model %s is quota exceeded for generative language API Key: %s", modelName, cliClient.GetGenerativeLanguageAPIKey())
}
cliClient = nil
continue
}
reorderedClients = append(reorderedClients, cliClient)
}
// If all clients have exceeded quota, return a 429 Too Many Requests error
if len(reorderedClients) == 0 {
c.Status(429)
_, _ = fmt.Fprint(c.Writer, fmt.Sprintf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName))
var errorResponse *client.ErrorMessage
cliClient, errorResponse = h.getClient(modelName)
if errorResponse != nil {
c.Status(errorResponse.StatusCode)
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
flusher.Flush()
cliCancel()
return
}
// Attempt to acquire a lock on an available client using non-blocking TryLock
// This prevents blocking when a client is busy with another request
locked := false
for i := 0; i < len(reorderedClients); i++ {
cliClient = reorderedClients[i]
if cliClient.RequestMutex.TryLock() {
locked = true
break
}
}
// If no client is immediately available, fall back to blocking on the first client
// This ensures the request will eventually be processed
if !locked {
cliClient = h.cliClients[0]
cliClient.RequestMutex.Lock()
}
// Determine the authentication method being used by the selected client
// This affects how responses are formatted and logged
isGlAPIKey := false

View File

@@ -86,6 +86,10 @@ func (h *APIHandlers) Models(c *gin.Context) {
}
func (h *APIHandlers) getClient(modelName string) (*client.Client, *client.ErrorMessage) {
if len(h.cliClients) == 0 {
return nil, &client.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
}
var cliClient *client.Client
// Lock the mutex to update the last used client index

View File

@@ -169,7 +169,7 @@ func getTokenFromWeb(ctx context.Context, config *oauth2.Config) (*oauth2.Token,
errChan := make(chan error)
// Create a new HTTP server.
server := &http.Server{Addr: "localhost:8085"}
server := &http.Server{Addr: ":8085"}
config.RedirectURL = "http://localhost:8085/oauth2callback"
http.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {