feat(api): implement protocol multiplexer and Redis queue for usage integration
- Added `protocol_multiplexer.go`, enabling support for both HTTP and Redis protocols on a single listener. - Introduced `redis_queue_protocol.go` to handle Redis-compatible RESP commands for queue management. - Integrated `redisqueue` package, supporting in-memory queuing with expiration pruning. - Updated server initialization to manage a shared listener and multiplex connections. - Adjusted `Handler` to adopt `AuthenticateManagementKey` for modular key validation, supporting both HTTP and Redis flows.
This commit is contained in:
@@ -0,0 +1,145 @@
|
||||
package redisqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
internallogging "github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
internalusage "github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
|
||||
coreusage "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
|
||||
)
|
||||
|
||||
func init() {
|
||||
coreusage.RegisterPlugin(&usageQueuePlugin{})
|
||||
}
|
||||
|
||||
type usageQueuePlugin struct{}
|
||||
|
||||
func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Record) {
|
||||
if p == nil {
|
||||
return
|
||||
}
|
||||
if !Enabled() || !internalusage.StatisticsEnabled() {
|
||||
return
|
||||
}
|
||||
|
||||
timestamp := record.RequestedAt
|
||||
if timestamp.IsZero() {
|
||||
timestamp = time.Now()
|
||||
}
|
||||
|
||||
modelName := strings.TrimSpace(record.Model)
|
||||
if modelName == "" {
|
||||
modelName = "unknown"
|
||||
}
|
||||
provider := strings.TrimSpace(record.Provider)
|
||||
if provider == "" {
|
||||
provider = "unknown"
|
||||
}
|
||||
authType := strings.TrimSpace(record.AuthType)
|
||||
if authType == "" {
|
||||
authType = "unknown"
|
||||
}
|
||||
apiKey := strings.TrimSpace(record.APIKey)
|
||||
requestID := strings.TrimSpace(internallogging.GetRequestID(ctx))
|
||||
if requestID == "" {
|
||||
if ginCtx, ok := ctx.Value("gin").(*gin.Context); ok && ginCtx != nil {
|
||||
requestID = strings.TrimSpace(internallogging.GetGinRequestID(ginCtx))
|
||||
}
|
||||
}
|
||||
|
||||
tokens := internalusage.TokenStats{
|
||||
InputTokens: record.Detail.InputTokens,
|
||||
OutputTokens: record.Detail.OutputTokens,
|
||||
ReasoningTokens: record.Detail.ReasoningTokens,
|
||||
CachedTokens: record.Detail.CachedTokens,
|
||||
TotalTokens: record.Detail.TotalTokens,
|
||||
}
|
||||
if tokens.TotalTokens == 0 {
|
||||
tokens.TotalTokens = tokens.InputTokens + tokens.OutputTokens + tokens.ReasoningTokens
|
||||
}
|
||||
if tokens.TotalTokens == 0 {
|
||||
tokens.TotalTokens = tokens.InputTokens + tokens.OutputTokens + tokens.ReasoningTokens + tokens.CachedTokens
|
||||
}
|
||||
|
||||
failed := record.Failed
|
||||
if !failed {
|
||||
failed = !resolveSuccess(ctx)
|
||||
}
|
||||
|
||||
detail := internalusage.RequestDetail{
|
||||
Timestamp: timestamp,
|
||||
LatencyMs: record.Latency.Milliseconds(),
|
||||
Source: record.Source,
|
||||
AuthIndex: record.AuthIndex,
|
||||
Tokens: tokens,
|
||||
Failed: failed,
|
||||
}
|
||||
|
||||
payload, err := json.Marshal(queuedUsageDetail{
|
||||
RequestDetail: detail,
|
||||
Provider: provider,
|
||||
Model: modelName,
|
||||
Endpoint: resolveEndpoint(ctx),
|
||||
AuthType: authType,
|
||||
APIKey: apiKey,
|
||||
RequestID: requestID,
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
Enqueue(payload)
|
||||
}
|
||||
|
||||
type queuedUsageDetail struct {
|
||||
internalusage.RequestDetail
|
||||
Provider string `json:"provider"`
|
||||
Model string `json:"model"`
|
||||
Endpoint string `json:"endpoint"`
|
||||
AuthType string `json:"auth_type"`
|
||||
APIKey string `json:"api_key"`
|
||||
RequestID string `json:"request_id"`
|
||||
}
|
||||
|
||||
func resolveSuccess(ctx context.Context) bool {
|
||||
if ctx == nil {
|
||||
return true
|
||||
}
|
||||
ginCtx, ok := ctx.Value("gin").(*gin.Context)
|
||||
if !ok || ginCtx == nil {
|
||||
return true
|
||||
}
|
||||
status := ginCtx.Writer.Status()
|
||||
if status == 0 {
|
||||
return true
|
||||
}
|
||||
return status < http.StatusBadRequest
|
||||
}
|
||||
|
||||
func resolveEndpoint(ctx context.Context) string {
|
||||
if ctx == nil {
|
||||
return ""
|
||||
}
|
||||
ginCtx, ok := ctx.Value("gin").(*gin.Context)
|
||||
if !ok || ginCtx == nil || ginCtx.Request == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
path := strings.TrimSpace(ginCtx.FullPath())
|
||||
if path == "" && ginCtx.Request.URL != nil {
|
||||
path = strings.TrimSpace(ginCtx.Request.URL.Path)
|
||||
}
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
method := strings.TrimSpace(ginCtx.Request.Method)
|
||||
if method == "" {
|
||||
return path
|
||||
}
|
||||
return method + " " + path
|
||||
}
|
||||
@@ -0,0 +1,160 @@
|
||||
package redisqueue
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
internallogging "github.com/router-for-me/CLIProxyAPI/v6/internal/logging"
|
||||
internalusage "github.com/router-for-me/CLIProxyAPI/v6/internal/usage"
|
||||
coreusage "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
|
||||
)
|
||||
|
||||
func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
|
||||
withEnabledQueue(t, func() {
|
||||
ginCtx := newTestGinContext(t, http.MethodPost, "/v1/chat/completions", http.StatusOK)
|
||||
internallogging.SetGinRequestID(ginCtx, "gin-request-id-ignored")
|
||||
ctx := context.WithValue(internallogging.WithRequestID(context.Background(), "ctx-request-id"), "gin", ginCtx)
|
||||
|
||||
plugin := &usageQueuePlugin{}
|
||||
plugin.HandleUsage(ctx, coreusage.Record{
|
||||
Provider: "openai",
|
||||
Model: "gpt-5.4",
|
||||
APIKey: "test-key",
|
||||
AuthIndex: "0",
|
||||
AuthType: "apikey",
|
||||
Source: "user@example.com",
|
||||
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
|
||||
Latency: 1500 * time.Millisecond,
|
||||
Detail: coreusage.Detail{
|
||||
InputTokens: 10,
|
||||
OutputTokens: 20,
|
||||
TotalTokens: 30,
|
||||
},
|
||||
})
|
||||
|
||||
payload := popSinglePayload(t)
|
||||
requireStringField(t, payload, "provider", "openai")
|
||||
requireStringField(t, payload, "model", "gpt-5.4")
|
||||
requireStringField(t, payload, "endpoint", "POST /v1/chat/completions")
|
||||
requireStringField(t, payload, "auth_type", "apikey")
|
||||
requireStringField(t, payload, "request_id", "ctx-request-id")
|
||||
requireBoolField(t, payload, "failed", false)
|
||||
})
|
||||
}
|
||||
|
||||
func TestUsageQueuePluginPayloadIncludesStableFieldsAndFailureAndGinRequestID(t *testing.T) {
|
||||
withEnabledQueue(t, func() {
|
||||
ginCtx := newTestGinContext(t, http.MethodGet, "/v1/responses", http.StatusInternalServerError)
|
||||
internallogging.SetGinRequestID(ginCtx, "gin-request-id")
|
||||
ctx := context.WithValue(context.Background(), "gin", ginCtx)
|
||||
|
||||
plugin := &usageQueuePlugin{}
|
||||
plugin.HandleUsage(ctx, coreusage.Record{
|
||||
Provider: "openai",
|
||||
Model: "gpt-5.4-mini",
|
||||
APIKey: "test-key",
|
||||
AuthIndex: "0",
|
||||
AuthType: "apikey",
|
||||
Source: "user@example.com",
|
||||
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
|
||||
Latency: 2500 * time.Millisecond,
|
||||
Detail: coreusage.Detail{
|
||||
InputTokens: 10,
|
||||
OutputTokens: 20,
|
||||
TotalTokens: 30,
|
||||
},
|
||||
})
|
||||
|
||||
payload := popSinglePayload(t)
|
||||
requireStringField(t, payload, "provider", "openai")
|
||||
requireStringField(t, payload, "model", "gpt-5.4-mini")
|
||||
requireStringField(t, payload, "endpoint", "GET /v1/responses")
|
||||
requireStringField(t, payload, "auth_type", "apikey")
|
||||
requireStringField(t, payload, "request_id", "gin-request-id")
|
||||
requireBoolField(t, payload, "failed", true)
|
||||
})
|
||||
}
|
||||
|
||||
func withEnabledQueue(t *testing.T, fn func()) {
|
||||
t.Helper()
|
||||
|
||||
prevQueueEnabled := Enabled()
|
||||
prevStatsEnabled := internalusage.StatisticsEnabled()
|
||||
|
||||
SetEnabled(false)
|
||||
SetEnabled(true)
|
||||
internalusage.SetStatisticsEnabled(true)
|
||||
|
||||
defer func() {
|
||||
SetEnabled(false)
|
||||
SetEnabled(prevQueueEnabled)
|
||||
internalusage.SetStatisticsEnabled(prevStatsEnabled)
|
||||
}()
|
||||
|
||||
fn()
|
||||
}
|
||||
|
||||
func newTestGinContext(t *testing.T, method, path string, status int) *gin.Context {
|
||||
t.Helper()
|
||||
|
||||
gin.SetMode(gin.TestMode)
|
||||
recorder := httptest.NewRecorder()
|
||||
ginCtx, _ := gin.CreateTestContext(recorder)
|
||||
ginCtx.Request = httptest.NewRequest(method, "http://example.com"+path, nil)
|
||||
if status != 0 {
|
||||
ginCtx.Status(status)
|
||||
}
|
||||
return ginCtx
|
||||
}
|
||||
|
||||
func popSinglePayload(t *testing.T) map[string]json.RawMessage {
|
||||
t.Helper()
|
||||
|
||||
items := PopOldest(10)
|
||||
if len(items) != 1 {
|
||||
t.Fatalf("PopOldest() items = %d, want 1", len(items))
|
||||
}
|
||||
|
||||
var payload map[string]json.RawMessage
|
||||
if err := json.Unmarshal(items[0], &payload); err != nil {
|
||||
t.Fatalf("unmarshal payload: %v", err)
|
||||
}
|
||||
return payload
|
||||
}
|
||||
|
||||
func requireStringField(t *testing.T, payload map[string]json.RawMessage, key, want string) {
|
||||
t.Helper()
|
||||
|
||||
raw, ok := payload[key]
|
||||
if !ok {
|
||||
t.Fatalf("payload missing %q", key)
|
||||
}
|
||||
var got string
|
||||
if err := json.Unmarshal(raw, &got); err != nil {
|
||||
t.Fatalf("unmarshal %q: %v", key, err)
|
||||
}
|
||||
if got != want {
|
||||
t.Fatalf("%s = %q, want %q", key, got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func requireBoolField(t *testing.T, payload map[string]json.RawMessage, key string, want bool) {
|
||||
t.Helper()
|
||||
|
||||
raw, ok := payload[key]
|
||||
if !ok {
|
||||
t.Fatalf("payload missing %q", key)
|
||||
}
|
||||
var got bool
|
||||
if err := json.Unmarshal(raw, &got); err != nil {
|
||||
t.Fatalf("unmarshal %q: %v", key, err)
|
||||
}
|
||||
if got != want {
|
||||
t.Fatalf("%s = %t, want %t", key, got, want)
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
package redisqueue
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
)
|
||||
|
||||
const retentionWindow = time.Minute
|
||||
|
||||
type queueItem struct {
|
||||
enqueuedAt time.Time
|
||||
payload []byte
|
||||
}
|
||||
|
||||
type queue struct {
|
||||
mu sync.Mutex
|
||||
items []queueItem
|
||||
head int
|
||||
}
|
||||
|
||||
var (
|
||||
enabled atomic.Bool
|
||||
global queue
|
||||
)
|
||||
|
||||
func SetEnabled(value bool) {
|
||||
enabled.Store(value)
|
||||
if !value {
|
||||
global.clear()
|
||||
}
|
||||
}
|
||||
|
||||
func Enabled() bool {
|
||||
return enabled.Load()
|
||||
}
|
||||
|
||||
func Enqueue(payload []byte) {
|
||||
if !Enabled() {
|
||||
return
|
||||
}
|
||||
if len(payload) == 0 {
|
||||
return
|
||||
}
|
||||
global.enqueue(payload)
|
||||
}
|
||||
|
||||
func PopOldest(count int) [][]byte {
|
||||
if !Enabled() {
|
||||
return nil
|
||||
}
|
||||
if count <= 0 {
|
||||
return nil
|
||||
}
|
||||
return global.popOldest(count)
|
||||
}
|
||||
|
||||
func (q *queue) clear() {
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
q.items = nil
|
||||
q.head = 0
|
||||
}
|
||||
|
||||
func (q *queue) enqueue(payload []byte) {
|
||||
now := time.Now()
|
||||
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
q.pruneLocked(now)
|
||||
q.items = append(q.items, queueItem{
|
||||
enqueuedAt: now,
|
||||
payload: append([]byte(nil), payload...),
|
||||
})
|
||||
q.maybeCompactLocked()
|
||||
}
|
||||
|
||||
func (q *queue) popOldest(count int) [][]byte {
|
||||
now := time.Now()
|
||||
|
||||
q.mu.Lock()
|
||||
defer q.mu.Unlock()
|
||||
|
||||
q.pruneLocked(now)
|
||||
available := len(q.items) - q.head
|
||||
if available <= 0 {
|
||||
q.items = nil
|
||||
q.head = 0
|
||||
return nil
|
||||
}
|
||||
if count > available {
|
||||
count = available
|
||||
}
|
||||
|
||||
out := make([][]byte, 0, count)
|
||||
for i := 0; i < count; i++ {
|
||||
item := q.items[q.head+i]
|
||||
out = append(out, item.payload)
|
||||
}
|
||||
q.head += count
|
||||
q.maybeCompactLocked()
|
||||
return out
|
||||
}
|
||||
|
||||
func (q *queue) pruneLocked(now time.Time) {
|
||||
if q.head >= len(q.items) {
|
||||
q.items = nil
|
||||
q.head = 0
|
||||
return
|
||||
}
|
||||
|
||||
cutoff := now.Add(-retentionWindow)
|
||||
for q.head < len(q.items) && q.items[q.head].enqueuedAt.Before(cutoff) {
|
||||
q.head++
|
||||
}
|
||||
}
|
||||
|
||||
func (q *queue) maybeCompactLocked() {
|
||||
if q.head == 0 {
|
||||
return
|
||||
}
|
||||
if q.head >= len(q.items) {
|
||||
q.items = nil
|
||||
q.head = 0
|
||||
return
|
||||
}
|
||||
if q.head < 1024 && q.head*2 < len(q.items) {
|
||||
return
|
||||
}
|
||||
q.items = append([]queueItem(nil), q.items[q.head:]...)
|
||||
q.head = 0
|
||||
}
|
||||
Reference in New Issue
Block a user