Merge pull request #3484 from yavon007/main

Add reasoning_effort to usage event payloads
This commit is contained in:
Luis Pater
2026-05-20 12:34:40 +08:00
committed by GitHub
12 changed files with 268 additions and 51 deletions
+21 -15
View File
@@ -48,6 +48,10 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
} }
apiKey := strings.TrimSpace(record.APIKey) apiKey := strings.TrimSpace(record.APIKey)
requestID := strings.TrimSpace(internallogging.GetRequestID(ctx)) requestID := strings.TrimSpace(internallogging.GetRequestID(ctx))
reasoningEffort := strings.TrimSpace(record.ReasoningEffort)
if reasoningEffort == "" {
reasoningEffort = coreusage.ReasoningEffortFromContext(ctx)
}
tokens := tokenStats{ tokens := tokenStats{
InputTokens: record.Detail.InputTokens, InputTokens: record.Detail.InputTokens,
@@ -83,14 +87,15 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
} }
payload, err := json.Marshal(queuedUsageDetail{ payload, err := json.Marshal(queuedUsageDetail{
requestDetail: detail, requestDetail: detail,
Provider: provider, Provider: provider,
Model: modelName, Model: modelName,
Alias: aliasName, Alias: aliasName,
Endpoint: resolveEndpoint(ctx), Endpoint: resolveEndpoint(ctx),
AuthType: authType, AuthType: authType,
APIKey: apiKey, APIKey: apiKey,
RequestID: requestID, RequestID: requestID,
ReasoningEffort: reasoningEffort,
}) })
if err != nil { if err != nil {
return return
@@ -100,13 +105,14 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
type queuedUsageDetail struct { type queuedUsageDetail struct {
requestDetail requestDetail
Provider string `json:"provider"` Provider string `json:"provider"`
Model string `json:"model"` Model string `json:"model"`
Alias string `json:"alias"` Alias string `json:"alias"`
Endpoint string `json:"endpoint"` Endpoint string `json:"endpoint"`
AuthType string `json:"auth_type"` AuthType string `json:"auth_type"`
APIKey string `json:"api_key"` APIKey string `json:"api_key"`
RequestID string `json:"request_id"` RequestID string `json:"request_id"`
ReasoningEffort string `json:"reasoning_effort"`
} }
type requestDetail struct { type requestDetail struct {
+11 -9
View File
@@ -25,15 +25,16 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
plugin := &usageQueuePlugin{} plugin := &usageQueuePlugin{}
plugin.HandleUsage(ctx, coreusage.Record{ plugin.HandleUsage(ctx, coreusage.Record{
Provider: "openai", Provider: "openai",
Model: "gpt-5.4", Model: "gpt-5.4",
Alias: "client-gpt", Alias: "client-gpt",
APIKey: "test-key", APIKey: "test-key",
AuthIndex: "0", AuthIndex: "0",
AuthType: "apikey", AuthType: "apikey",
Source: "user@example.com", Source: "user@example.com",
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC), ReasoningEffort: "medium",
Latency: 1500 * time.Millisecond, RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
Latency: 1500 * time.Millisecond,
Detail: coreusage.Detail{ Detail: coreusage.Detail{
InputTokens: 10, InputTokens: 10,
OutputTokens: 20, OutputTokens: 20,
@@ -51,6 +52,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
requireStringField(t, payload, "auth_type", "apikey") requireStringField(t, payload, "auth_type", "apikey")
requireMissingField(t, payload, "user_api_key") requireMissingField(t, payload, "user_api_key")
requireStringField(t, payload, "request_id", "ctx-request-id") requireStringField(t, payload, "request_id", "ctx-request-id")
requireStringField(t, payload, "reasoning_effort", "medium")
requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"}) requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"})
requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"}) requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"})
requireBoolField(t, payload, "failed", false) requireBoolField(t, payload, "failed", false)
@@ -26,6 +26,7 @@ type UsageReporter struct {
authType string authType string
apiKey string apiKey string
source string source string
reasoning string
requestedAt time.Time requestedAt time.Time
once sync.Once once sync.Once
} }
@@ -44,6 +45,7 @@ func NewUsageReporter(ctx context.Context, provider, model string, auth *cliprox
apiKey: apiKey, apiKey: apiKey,
source: resolveUsageSource(auth, apiKey), source: resolveUsageSource(auth, apiKey),
authType: resolveUsageAuthType(auth), authType: resolveUsageAuthType(auth),
reasoning: usage.ReasoningEffortFromContext(ctx),
} }
if auth != nil { if auth != nil {
reporter.authID = auth.ID reporter.authID = auth.ID
@@ -156,19 +158,20 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f
return usage.Record{Model: model, Detail: detail, Failed: failed, Fail: fail} return usage.Record{Model: model, Detail: detail, Failed: failed, Fail: fail}
} }
return usage.Record{ return usage.Record{
Provider: r.provider, Provider: r.provider,
Model: model, Model: model,
Alias: r.alias, Alias: r.alias,
Source: r.source, Source: r.source,
APIKey: r.apiKey, APIKey: r.apiKey,
AuthID: r.authID, AuthID: r.authID,
AuthIndex: r.authIndex, AuthIndex: r.authIndex,
AuthType: r.authType, AuthType: r.authType,
RequestedAt: r.requestedAt, ReasoningEffort: r.reasoning,
Latency: r.latency(), RequestedAt: r.requestedAt,
Failed: failed, Latency: r.latency(),
Fail: fail, Failed: failed,
Detail: detail, Fail: fail,
Detail: detail,
} }
} }
@@ -159,6 +159,16 @@ func TestUsageReporterBuildRecordIncludesRequestedModelAlias(t *testing.T) {
} }
} }
func TestUsageReporterBuildRecordIncludesReasoningEffort(t *testing.T) {
ctx := usage.WithReasoningEffort(context.Background(), "medium")
reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil)
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
if record.ReasoningEffort != "medium" {
t.Fatalf("reasoning effort = %q, want %q", record.ReasoningEffort, "medium")
}
}
func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) { func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) {
reporter := &UsageReporter{ reporter := &UsageReporter{
provider: "codex", provider: "codex",
+50
View File
@@ -339,6 +339,56 @@ func hasThinkingConfig(config ThinkingConfig) bool {
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != "" return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
} }
// ExtractReasoningEffort returns the request's thinking setting as a canonical
// reasoning_effort label for usage logging. Model suffixes have the same
// priority as ApplyThinking: a valid suffix overrides body fields.
func ExtractReasoningEffort(body []byte, provider, model string) string {
if effort := reasoningEffortFromSuffix(ParseSuffix(model)); effort != "" {
return effort
}
provider = strings.ToLower(strings.TrimSpace(provider))
config := extractThinkingConfig(body, provider)
if !hasThinkingConfig(config) {
switch provider {
case "openai-response":
config = extractCodexConfig(body)
case "openai":
config = extractCodexConfig(body)
}
}
return reasoningEffortFromConfig(config)
}
func reasoningEffortFromSuffix(suffix SuffixResult) string {
if !suffix.HasSuffix {
return ""
}
return reasoningEffortFromConfig(parseSuffixToConfig(suffix.RawSuffix, "", suffix.ModelName))
}
func reasoningEffortFromConfig(config ThinkingConfig) string {
if !hasThinkingConfig(config) {
return ""
}
switch config.Mode {
case ModeNone:
return string(LevelNone)
case ModeAuto:
return string(LevelAuto)
case ModeLevel:
return strings.ToLower(strings.TrimSpace(string(config.Level)))
case ModeBudget:
level, ok := ConvertBudgetToLevel(config.Budget)
if !ok {
return ""
}
return level
default:
return ""
}
}
// extractClaudeConfig extracts thinking configuration from Claude format request body. // extractClaudeConfig extracts thinking configuration from Claude format request body.
// //
// Claude API format: // Claude API format:
@@ -0,0 +1,31 @@
package thinking
import "testing"
func TestExtractReasoningEffortUsesSuffixOverBody(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"reasoning_effort":"low"}`), "openai", "gpt-5.4(high)")
if got != "high" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "high")
}
}
func TestExtractReasoningEffortConvertsBudgetToLevel(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"thinking":{"type":"enabled","budget_tokens":8192}}`), "claude", "claude-sonnet-4-5")
if got != "medium" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
}
}
func TestExtractReasoningEffortSupportsOpenAIResponses(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"reasoning":{"effort":"medium"}}`), "openai-response", "gpt-5.4")
if got != "medium" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
}
}
func TestExtractReasoningEffortMissingConfigIsEmpty(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"messages":[{"role":"user","content":"hi"}]}`), "openai", "gpt-5.4")
if got != "" {
t.Fatalf("ExtractReasoningEffort() = %q, want empty", got)
}
}
+14
View File
@@ -231,6 +231,17 @@ func requestExecutionMetadata(ctx context.Context) map[string]any {
return meta return meta
} }
func setReasoningEffortMetadata(meta map[string]any, handlerType, model string, rawJSON []byte) {
if meta == nil {
return
}
effort := thinking.ExtractReasoningEffort(rawJSON, handlerType, model)
if effort == "" {
return
}
meta[coreexecutor.ReasoningEffortMetadataKey] = effort
}
// headersFromContext extracts the original HTTP request headers from the gin context // headersFromContext extracts the original HTTP request headers from the gin context
// embedded in the provided context. This allows session affinity selectors to read // embedded in the provided context. This allows session affinity selectors to read
// client headers like X-Amp-Thread-Id. // client headers like X-Amp-Thread-Id.
@@ -550,6 +561,7 @@ func (h *BaseAPIHandler) executeWithAuthManager(ctx context.Context, handlerType
} }
reqMeta := requestExecutionMetadata(ctx) reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON payload := rawJSON
if len(payload) == 0 { if len(payload) == 0 {
payload = nil payload = nil
@@ -598,6 +610,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
} }
reqMeta := requestExecutionMetadata(ctx) reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON payload := rawJSON
if len(payload) == 0 { if len(payload) == 0 {
payload = nil payload = nil
@@ -659,6 +672,7 @@ func (h *BaseAPIHandler) executeStreamWithAuthManager(ctx context.Context, handl
} }
reqMeta := requestExecutionMetadata(ctx) reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON payload := rawJSON
if len(payload) == 0 { if len(payload) == 0 {
payload = nil payload = nil
@@ -18,3 +18,23 @@ func TestRequestExecutionMetadataIncludesExecutionSessionWithoutIdempotencyKey(t
t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey]) t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey])
} }
} }
func TestSetReasoningEffortMetadataUsesSuffixOverBody(t *testing.T) {
meta := make(map[string]any)
setReasoningEffortMetadata(meta, "openai", "gpt-5.4(high)", []byte(`{"reasoning_effort":"low"}`))
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "high" {
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "high")
}
}
func TestSetReasoningEffortMetadataSupportsOpenAIResponses(t *testing.T) {
meta := make(map[string]any)
setReasoningEffortMetadata(meta, "openai-response", "gpt-5.4", []byte(`{"reasoning":{"effort":"medium"}}`))
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "medium" {
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "medium")
}
}
+23 -1
View File
@@ -1632,7 +1632,11 @@ func hasRequestedModelMetadata(meta map[string]any) bool {
func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context { func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context {
alias := requestedModelAliasFromOptions(opts, fallback) alias := requestedModelAliasFromOptions(opts, fallback)
return coreusage.WithRequestedModelAlias(ctx, alias) ctx = coreusage.WithRequestedModelAlias(ctx, alias)
if effort := reasoningEffortFromOptions(opts); effort != "" {
ctx = coreusage.WithReasoningEffort(ctx, effort)
}
return ctx
} }
func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string { func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string {
@@ -1660,6 +1664,24 @@ func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback stri
} }
} }
func reasoningEffortFromOptions(opts cliproxyexecutor.Options) string {
if len(opts.Metadata) == 0 {
return ""
}
raw, ok := opts.Metadata[cliproxyexecutor.ReasoningEffortMetadataKey]
if !ok || raw == nil {
return ""
}
switch value := raw.(type) {
case string:
return strings.TrimSpace(value)
case []byte:
return strings.TrimSpace(string(value))
default:
return ""
}
}
func pinnedAuthIDFromMetadata(meta map[string]any) string { func pinnedAuthIDFromMetadata(meta map[string]any) string {
if len(meta) == 0 { if len(meta) == 0 {
return "" return ""
+25
View File
@@ -0,0 +1,25 @@
package auth
import (
"context"
"testing"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
coreusage "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage"
)
func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) {
ctx := contextWithRequestedModelAlias(context.Background(), cliproxyexecutor.Options{
Metadata: map[string]any{
cliproxyexecutor.RequestedModelMetadataKey: "client-model",
cliproxyexecutor.ReasoningEffortMetadataKey: "medium",
},
}, "fallback-model")
if got := coreusage.RequestedModelAliasFromContext(ctx); got != "client-model" {
t.Fatalf("requested model alias = %q, want %q", got, "client-model")
}
if got := coreusage.ReasoningEffortFromContext(ctx); got != "medium" {
t.Fatalf("reasoning effort = %q, want %q", got, "medium")
}
}
+3
View File
@@ -17,6 +17,9 @@ const RequestPathMetadataKey = "request_path"
// DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials. // DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials.
const DisallowFreeAuthMetadataKey = "disallow_free_auth" const DisallowFreeAuthMetadataKey = "disallow_free_auth"
// ReasoningEffortMetadataKey stores the client-requested reasoning effort for usage logs.
const ReasoningEffortMetadataKey = "reasoning_effort"
const ( const (
// PinnedAuthMetadataKey locks execution to a specific auth ID. // PinnedAuthMetadataKey locks execution to a specific auth ID.
PinnedAuthMetadataKey = "pinned_auth_id" PinnedAuthMetadataKey = "pinned_auth_id"
+44 -13
View File
@@ -12,19 +12,21 @@ import (
// Record contains the usage statistics captured for a single provider request. // Record contains the usage statistics captured for a single provider request.
type Record struct { type Record struct {
Provider string Provider string
Model string Model string
Alias string Alias string
APIKey string APIKey string
AuthID string AuthID string
AuthIndex string AuthIndex string
AuthType string AuthType string
Source string Source string
RequestedAt time.Time // ReasoningEffort stores the client-requested thinking level for request event logs.
Latency time.Duration ReasoningEffort string
Failed bool RequestedAt time.Time
Fail Failure Latency time.Duration
Detail Detail Failed bool
Fail Failure
Detail Detail
// ResponseHeaders stores a snapshot of upstream response headers for usage sinks. // ResponseHeaders stores a snapshot of upstream response headers for usage sinks.
ResponseHeaders http.Header ResponseHeaders http.Header
} }
@@ -47,6 +49,7 @@ type Detail struct {
} }
type requestedModelAliasContextKey struct{} type requestedModelAliasContextKey struct{}
type reasoningEffortContextKey struct{}
// WithRequestedModelAlias stores the client-requested model name for usage sinks. // WithRequestedModelAlias stores the client-requested model name for usage sinks.
func WithRequestedModelAlias(ctx context.Context, alias string) context.Context { func WithRequestedModelAlias(ctx context.Context, alias string) context.Context {
@@ -76,6 +79,34 @@ func RequestedModelAliasFromContext(ctx context.Context) string {
} }
} }
// WithReasoningEffort stores the client-requested reasoning effort for usage sinks.
func WithReasoningEffort(ctx context.Context, effort string) context.Context {
if ctx == nil {
ctx = context.Background()
}
effort = strings.TrimSpace(effort)
if effort == "" {
return ctx
}
return context.WithValue(ctx, reasoningEffortContextKey{}, effort)
}
// ReasoningEffortFromContext returns the client-requested reasoning effort stored in ctx.
func ReasoningEffortFromContext(ctx context.Context) string {
if ctx == nil {
return ""
}
raw := ctx.Value(reasoningEffortContextKey{})
switch value := raw.(type) {
case string:
return strings.TrimSpace(value)
case []byte:
return strings.TrimSpace(string(value))
default:
return ""
}
}
// Plugin consumes usage records emitted by the proxy runtime. // Plugin consumes usage records emitted by the proxy runtime.
type Plugin interface { type Plugin interface {
HandleUsage(ctx context.Context, record Record) HandleUsage(ctx context.Context, record Record)