Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f49a530c1a | ||
|
|
368796349e |
42
.github/workflows/docker-image.yml
vendored
Normal file
42
.github/workflows/docker-image.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
name: docker-image
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- v*
|
||||
|
||||
env:
|
||||
APP_NAME: CLIProxyAPI
|
||||
DOCKERHUB_REPO: eceasy/cli-proxy-api
|
||||
|
||||
jobs:
|
||||
docker:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
- name: Generate App Version
|
||||
run: echo APP_VERSION=`git describe --tags --always` >> $GITHUB_ENV
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
platforms: |
|
||||
linux/amd64
|
||||
linux/arm64
|
||||
push: true
|
||||
build-args: |
|
||||
APP_NAME=${{ env.APP_NAME }}
|
||||
APP_VERSION=${{ env.APP_VERSION }}
|
||||
tags: |
|
||||
${{ env.DOCKERHUB_REPO }}:latest
|
||||
${{ env.DOCKERHUB_REPO }}:${{ env.APP_VERSION }}
|
||||
23
Dockerfile
Normal file
23
Dockerfile
Normal file
@@ -0,0 +1,23 @@
|
||||
FROM golang:1.24-alpine AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod go.sum ./
|
||||
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o ./CLIProxyAPI ./cmd/server/
|
||||
|
||||
FROM alpine:3.22.0
|
||||
|
||||
RUN mkdir /CLIProxyAPI
|
||||
|
||||
COPY --from=builder ./app/CLIProxyAPI /CLIProxyAPI/CLIProxyAPI
|
||||
|
||||
WORKDIR /CLIProxyAPI
|
||||
|
||||
EXPOSE 8317
|
||||
|
||||
CMD ["./CLIProxyAPI"]
|
||||
14
README.md
14
README.md
@@ -212,6 +212,20 @@ The server will relay the `loadCodeAssist`, `onboardUser`, and `countTokens` req
|
||||
> This feature only allows local access because I couldn't find a way to authenticate the requests.
|
||||
> I hardcoded `127.0.0.1` into the load balancing.
|
||||
|
||||
## Run with Docker
|
||||
|
||||
Run the following command to login:
|
||||
|
||||
```bash
|
||||
docker run --rm -p 8085:8085 -v /path/to/your/config.yaml:/CLIProxyAPI/config.yaml -v /path/to/your/auth-dir:/root/.cli-proxy-api eceasy/cli-proxy-api:latest /CLIProxyAPI/CLIProxyAPI --login
|
||||
```
|
||||
|
||||
Run the following command to start the server:
|
||||
|
||||
```bash
|
||||
docker run --rm -p 8317:8317 -v /path/to/your/config.yaml:/CLIProxyAPI/config.yaml -v /path/to/your/auth-dir:/root/.cli-proxy-api eceasy/cli-proxy-api:latest
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Contributions are welcome! Please feel free to submit a Pull Request.
|
||||
|
||||
@@ -78,62 +78,16 @@ func (h *APIHandlers) ClaudeMessages(c *gin.Context) {
|
||||
// This loop implements a sophisticated load balancing and failover mechanism
|
||||
outLoop:
|
||||
for {
|
||||
// Thread-safe client index rotation to distribute load evenly
|
||||
// This ensures fair usage across all available clients
|
||||
mutex.Lock()
|
||||
startIndex := lastUsedClientIndex
|
||||
currentIndex := (startIndex + 1) % len(h.cliClients)
|
||||
lastUsedClientIndex = currentIndex
|
||||
mutex.Unlock()
|
||||
|
||||
// Build a list of available clients, starting from the next client in rotation
|
||||
// This implements round-robin load balancing while filtering out quota-exceeded clients
|
||||
reorderedClients := make([]*client.Client, 0)
|
||||
for i := 0; i < len(h.cliClients); i++ {
|
||||
cliClient = h.cliClients[(startIndex+1+i)%len(h.cliClients)]
|
||||
|
||||
// Skip clients that have exceeded their quota for the requested model
|
||||
if cliClient.IsModelQuotaExceeded(modelName) {
|
||||
// Log different messages based on authentication method (API key vs account)
|
||||
if cliClient.GetGenerativeLanguageAPIKey() == "" {
|
||||
log.Debugf("Model %s is quota exceeded for account %s, project id: %s", modelName, cliClient.GetEmail(), cliClient.GetProjectID())
|
||||
} else {
|
||||
log.Debugf("Model %s is quota exceeded for generative language API Key: %s", modelName, cliClient.GetGenerativeLanguageAPIKey())
|
||||
}
|
||||
|
||||
cliClient = nil
|
||||
continue
|
||||
}
|
||||
reorderedClients = append(reorderedClients, cliClient)
|
||||
}
|
||||
|
||||
// If all clients have exceeded quota, return a 429 Too Many Requests error
|
||||
if len(reorderedClients) == 0 {
|
||||
c.Status(429)
|
||||
_, _ = fmt.Fprint(c.Writer, fmt.Sprintf(`{"error":{"code":429,"message":"All the models of '%s' are quota exceeded","status":"RESOURCE_EXHAUSTED"}}`, modelName))
|
||||
var errorResponse *client.ErrorMessage
|
||||
cliClient, errorResponse = h.getClient(modelName)
|
||||
if errorResponse != nil {
|
||||
c.Status(errorResponse.StatusCode)
|
||||
_, _ = fmt.Fprint(c.Writer, errorResponse.Error)
|
||||
flusher.Flush()
|
||||
cliCancel()
|
||||
return
|
||||
}
|
||||
|
||||
// Attempt to acquire a lock on an available client using non-blocking TryLock
|
||||
// This prevents blocking when a client is busy with another request
|
||||
locked := false
|
||||
for i := 0; i < len(reorderedClients); i++ {
|
||||
cliClient = reorderedClients[i]
|
||||
if cliClient.RequestMutex.TryLock() {
|
||||
locked = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If no client is immediately available, fall back to blocking on the first client
|
||||
// This ensures the request will eventually be processed
|
||||
if !locked {
|
||||
cliClient = h.cliClients[0]
|
||||
cliClient.RequestMutex.Lock()
|
||||
}
|
||||
|
||||
// Determine the authentication method being used by the selected client
|
||||
// This affects how responses are formatted and logged
|
||||
isGlAPIKey := false
|
||||
|
||||
@@ -86,6 +86,10 @@ func (h *APIHandlers) Models(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (h *APIHandlers) getClient(modelName string) (*client.Client, *client.ErrorMessage) {
|
||||
if len(h.cliClients) == 0 {
|
||||
return nil, &client.ErrorMessage{StatusCode: 500, Error: fmt.Errorf("no clients available")}
|
||||
}
|
||||
|
||||
var cliClient *client.Client
|
||||
|
||||
// Lock the mutex to update the last used client index
|
||||
|
||||
@@ -169,7 +169,7 @@ func getTokenFromWeb(ctx context.Context, config *oauth2.Config) (*oauth2.Token,
|
||||
errChan := make(chan error)
|
||||
|
||||
// Create a new HTTP server.
|
||||
server := &http.Server{Addr: "localhost:8085"}
|
||||
server := &http.Server{Addr: ":8085"}
|
||||
config.RedirectURL = "http://localhost:8085/oauth2callback"
|
||||
|
||||
http.HandleFunc("/oauth2callback", func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
Reference in New Issue
Block a user