Add reasoning effort to usage events

This commit is contained in:
yavon007
2026-05-19 22:10:48 +08:00
parent bb5ac40a67
commit 0de0ad0d36
12 changed files with 268 additions and 51 deletions
+6
View File
@@ -48,6 +48,10 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
} }
apiKey := strings.TrimSpace(record.APIKey) apiKey := strings.TrimSpace(record.APIKey)
requestID := strings.TrimSpace(internallogging.GetRequestID(ctx)) requestID := strings.TrimSpace(internallogging.GetRequestID(ctx))
reasoningEffort := strings.TrimSpace(record.ReasoningEffort)
if reasoningEffort == "" {
reasoningEffort = coreusage.ReasoningEffortFromContext(ctx)
}
tokens := tokenStats{ tokens := tokenStats{
InputTokens: record.Detail.InputTokens, InputTokens: record.Detail.InputTokens,
@@ -91,6 +95,7 @@ func (p *usageQueuePlugin) HandleUsage(ctx context.Context, record coreusage.Rec
AuthType: authType, AuthType: authType,
APIKey: apiKey, APIKey: apiKey,
RequestID: requestID, RequestID: requestID,
ReasoningEffort: reasoningEffort,
}) })
if err != nil { if err != nil {
return return
@@ -107,6 +112,7 @@ type queuedUsageDetail struct {
AuthType string `json:"auth_type"` AuthType string `json:"auth_type"`
APIKey string `json:"api_key"` APIKey string `json:"api_key"`
RequestID string `json:"request_id"` RequestID string `json:"request_id"`
ReasoningEffort string `json:"reasoning_effort"`
} }
type requestDetail struct { type requestDetail struct {
+2
View File
@@ -32,6 +32,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
AuthIndex: "0", AuthIndex: "0",
AuthType: "apikey", AuthType: "apikey",
Source: "user@example.com", Source: "user@example.com",
ReasoningEffort: "medium",
RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC), RequestedAt: time.Date(2026, 4, 25, 0, 0, 0, 0, time.UTC),
Latency: 1500 * time.Millisecond, Latency: 1500 * time.Millisecond,
Detail: coreusage.Detail{ Detail: coreusage.Detail{
@@ -51,6 +52,7 @@ func TestUsageQueuePluginPayloadIncludesStableFieldsAndSuccess(t *testing.T) {
requireStringField(t, payload, "auth_type", "apikey") requireStringField(t, payload, "auth_type", "apikey")
requireMissingField(t, payload, "user_api_key") requireMissingField(t, payload, "user_api_key")
requireStringField(t, payload, "request_id", "ctx-request-id") requireStringField(t, payload, "request_id", "ctx-request-id")
requireStringField(t, payload, "reasoning_effort", "medium")
requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"}) requireHeaderField(t, payload, "response_headers", "X-Upstream-Request-Id", []string{"upstream-req-1"})
requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"}) requireHeaderField(t, payload, "response_headers", "Retry-After", []string{"30"})
requireBoolField(t, payload, "failed", false) requireBoolField(t, payload, "failed", false)
@@ -26,6 +26,7 @@ type UsageReporter struct {
authType string authType string
apiKey string apiKey string
source string source string
reasoning string
requestedAt time.Time requestedAt time.Time
once sync.Once once sync.Once
} }
@@ -44,6 +45,7 @@ func NewUsageReporter(ctx context.Context, provider, model string, auth *cliprox
apiKey: apiKey, apiKey: apiKey,
source: resolveUsageSource(auth, apiKey), source: resolveUsageSource(auth, apiKey),
authType: resolveUsageAuthType(auth), authType: resolveUsageAuthType(auth),
reasoning: usage.ReasoningEffortFromContext(ctx),
} }
if auth != nil { if auth != nil {
reporter.authID = auth.ID reporter.authID = auth.ID
@@ -164,6 +166,7 @@ func (r *UsageReporter) buildRecordForModel(model string, detail usage.Detail, f
AuthID: r.authID, AuthID: r.authID,
AuthIndex: r.authIndex, AuthIndex: r.authIndex,
AuthType: r.authType, AuthType: r.authType,
ReasoningEffort: r.reasoning,
RequestedAt: r.requestedAt, RequestedAt: r.requestedAt,
Latency: r.latency(), Latency: r.latency(),
Failed: failed, Failed: failed,
@@ -159,6 +159,16 @@ func TestUsageReporterBuildRecordIncludesRequestedModelAlias(t *testing.T) {
} }
} }
func TestUsageReporterBuildRecordIncludesReasoningEffort(t *testing.T) {
ctx := usage.WithReasoningEffort(context.Background(), "medium")
reporter := NewUsageReporter(ctx, "openai", "gpt-5.4", nil)
record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
if record.ReasoningEffort != "medium" {
t.Fatalf("reasoning effort = %q, want %q", record.ReasoningEffort, "medium")
}
}
func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) { func TestUsageReporterBuildAdditionalModelRecordSkipsZeroTokens(t *testing.T) {
reporter := &UsageReporter{ reporter := &UsageReporter{
provider: "codex", provider: "codex",
+50
View File
@@ -339,6 +339,56 @@ func hasThinkingConfig(config ThinkingConfig) bool {
return config.Mode != ModeBudget || config.Budget != 0 || config.Level != "" return config.Mode != ModeBudget || config.Budget != 0 || config.Level != ""
} }
// ExtractReasoningEffort returns the request's thinking setting as a canonical
// reasoning_effort label for usage logging. Model suffixes have the same
// priority as ApplyThinking: a valid suffix overrides body fields.
func ExtractReasoningEffort(body []byte, provider, model string) string {
if effort := reasoningEffortFromSuffix(ParseSuffix(model)); effort != "" {
return effort
}
provider = strings.ToLower(strings.TrimSpace(provider))
config := extractThinkingConfig(body, provider)
if !hasThinkingConfig(config) {
switch provider {
case "openai-response":
config = extractCodexConfig(body)
case "openai":
config = extractCodexConfig(body)
}
}
return reasoningEffortFromConfig(config)
}
func reasoningEffortFromSuffix(suffix SuffixResult) string {
if !suffix.HasSuffix {
return ""
}
return reasoningEffortFromConfig(parseSuffixToConfig(suffix.RawSuffix, "", suffix.ModelName))
}
func reasoningEffortFromConfig(config ThinkingConfig) string {
if !hasThinkingConfig(config) {
return ""
}
switch config.Mode {
case ModeNone:
return string(LevelNone)
case ModeAuto:
return string(LevelAuto)
case ModeLevel:
return strings.ToLower(strings.TrimSpace(string(config.Level)))
case ModeBudget:
level, ok := ConvertBudgetToLevel(config.Budget)
if !ok {
return ""
}
return level
default:
return ""
}
}
// extractClaudeConfig extracts thinking configuration from Claude format request body. // extractClaudeConfig extracts thinking configuration from Claude format request body.
// //
// Claude API format: // Claude API format:
@@ -0,0 +1,31 @@
package thinking
import "testing"
func TestExtractReasoningEffortUsesSuffixOverBody(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"reasoning_effort":"low"}`), "openai", "gpt-5.4(high)")
if got != "high" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "high")
}
}
func TestExtractReasoningEffortConvertsBudgetToLevel(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"thinking":{"type":"enabled","budget_tokens":8192}}`), "claude", "claude-sonnet-4-5")
if got != "medium" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
}
}
func TestExtractReasoningEffortSupportsOpenAIResponses(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"reasoning":{"effort":"medium"}}`), "openai-response", "gpt-5.4")
if got != "medium" {
t.Fatalf("ExtractReasoningEffort() = %q, want %q", got, "medium")
}
}
func TestExtractReasoningEffortMissingConfigIsEmpty(t *testing.T) {
got := ExtractReasoningEffort([]byte(`{"messages":[{"role":"user","content":"hi"}]}`), "openai", "gpt-5.4")
if got != "" {
t.Fatalf("ExtractReasoningEffort() = %q, want empty", got)
}
}
+14
View File
@@ -231,6 +231,17 @@ func requestExecutionMetadata(ctx context.Context) map[string]any {
return meta return meta
} }
func setReasoningEffortMetadata(meta map[string]any, handlerType, model string, rawJSON []byte) {
if meta == nil {
return
}
effort := thinking.ExtractReasoningEffort(rawJSON, handlerType, model)
if effort == "" {
return
}
meta[coreexecutor.ReasoningEffortMetadataKey] = effort
}
// headersFromContext extracts the original HTTP request headers from the gin context // headersFromContext extracts the original HTTP request headers from the gin context
// embedded in the provided context. This allows session affinity selectors to read // embedded in the provided context. This allows session affinity selectors to read
// client headers like X-Amp-Thread-Id. // client headers like X-Amp-Thread-Id.
@@ -550,6 +561,7 @@ func (h *BaseAPIHandler) executeWithAuthManager(ctx context.Context, handlerType
} }
reqMeta := requestExecutionMetadata(ctx) reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON payload := rawJSON
if len(payload) == 0 { if len(payload) == 0 {
payload = nil payload = nil
@@ -598,6 +610,7 @@ func (h *BaseAPIHandler) ExecuteCountWithAuthManager(ctx context.Context, handle
} }
reqMeta := requestExecutionMetadata(ctx) reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON payload := rawJSON
if len(payload) == 0 { if len(payload) == 0 {
payload = nil payload = nil
@@ -659,6 +672,7 @@ func (h *BaseAPIHandler) executeStreamWithAuthManager(ctx context.Context, handl
} }
reqMeta := requestExecutionMetadata(ctx) reqMeta := requestExecutionMetadata(ctx)
reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName reqMeta[coreexecutor.RequestedModelMetadataKey] = modelName
setReasoningEffortMetadata(reqMeta, handlerType, normalizedModel, rawJSON)
payload := rawJSON payload := rawJSON
if len(payload) == 0 { if len(payload) == 0 {
payload = nil payload = nil
@@ -18,3 +18,23 @@ func TestRequestExecutionMetadataIncludesExecutionSessionWithoutIdempotencyKey(t
t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey]) t.Fatalf("unexpected idempotency key in metadata: %v", meta[idempotencyKeyMetadataKey])
} }
} }
func TestSetReasoningEffortMetadataUsesSuffixOverBody(t *testing.T) {
meta := make(map[string]any)
setReasoningEffortMetadata(meta, "openai", "gpt-5.4(high)", []byte(`{"reasoning_effort":"low"}`))
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "high" {
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "high")
}
}
func TestSetReasoningEffortMetadataSupportsOpenAIResponses(t *testing.T) {
meta := make(map[string]any)
setReasoningEffortMetadata(meta, "openai-response", "gpt-5.4", []byte(`{"reasoning":{"effort":"medium"}}`))
if got := meta[coreexecutor.ReasoningEffortMetadataKey]; got != "medium" {
t.Fatalf("ReasoningEffortMetadataKey = %v, want %q", got, "medium")
}
}
+23 -1
View File
@@ -1632,7 +1632,11 @@ func hasRequestedModelMetadata(meta map[string]any) bool {
func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context { func contextWithRequestedModelAlias(ctx context.Context, opts cliproxyexecutor.Options, fallback string) context.Context {
alias := requestedModelAliasFromOptions(opts, fallback) alias := requestedModelAliasFromOptions(opts, fallback)
return coreusage.WithRequestedModelAlias(ctx, alias) ctx = coreusage.WithRequestedModelAlias(ctx, alias)
if effort := reasoningEffortFromOptions(opts); effort != "" {
ctx = coreusage.WithReasoningEffort(ctx, effort)
}
return ctx
} }
func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string { func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback string) string {
@@ -1660,6 +1664,24 @@ func requestedModelAliasFromOptions(opts cliproxyexecutor.Options, fallback stri
} }
} }
func reasoningEffortFromOptions(opts cliproxyexecutor.Options) string {
if len(opts.Metadata) == 0 {
return ""
}
raw, ok := opts.Metadata[cliproxyexecutor.ReasoningEffortMetadataKey]
if !ok || raw == nil {
return ""
}
switch value := raw.(type) {
case string:
return strings.TrimSpace(value)
case []byte:
return strings.TrimSpace(string(value))
default:
return ""
}
}
func pinnedAuthIDFromMetadata(meta map[string]any) string { func pinnedAuthIDFromMetadata(meta map[string]any) string {
if len(meta) == 0 { if len(meta) == 0 {
return "" return ""
+25
View File
@@ -0,0 +1,25 @@
package auth
import (
"context"
"testing"
cliproxyexecutor "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/executor"
coreusage "github.com/router-for-me/CLIProxyAPI/v7/sdk/cliproxy/usage"
)
func TestContextWithRequestedModelAliasIncludesReasoningEffort(t *testing.T) {
ctx := contextWithRequestedModelAlias(context.Background(), cliproxyexecutor.Options{
Metadata: map[string]any{
cliproxyexecutor.RequestedModelMetadataKey: "client-model",
cliproxyexecutor.ReasoningEffortMetadataKey: "medium",
},
}, "fallback-model")
if got := coreusage.RequestedModelAliasFromContext(ctx); got != "client-model" {
t.Fatalf("requested model alias = %q, want %q", got, "client-model")
}
if got := coreusage.ReasoningEffortFromContext(ctx); got != "medium" {
t.Fatalf("reasoning effort = %q, want %q", got, "medium")
}
}
+3
View File
@@ -17,6 +17,9 @@ const RequestPathMetadataKey = "request_path"
// DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials. // DisallowFreeAuthMetadataKey instructs auth selection to skip known free-tier credentials.
const DisallowFreeAuthMetadataKey = "disallow_free_auth" const DisallowFreeAuthMetadataKey = "disallow_free_auth"
// ReasoningEffortMetadataKey stores the client-requested reasoning effort for usage logs.
const ReasoningEffortMetadataKey = "reasoning_effort"
const ( const (
// PinnedAuthMetadataKey locks execution to a specific auth ID. // PinnedAuthMetadataKey locks execution to a specific auth ID.
PinnedAuthMetadataKey = "pinned_auth_id" PinnedAuthMetadataKey = "pinned_auth_id"
+31
View File
@@ -20,6 +20,8 @@ type Record struct {
AuthIndex string AuthIndex string
AuthType string AuthType string
Source string Source string
// ReasoningEffort stores the client-requested thinking level for request event logs.
ReasoningEffort string
RequestedAt time.Time RequestedAt time.Time
Latency time.Duration Latency time.Duration
Failed bool Failed bool
@@ -47,6 +49,7 @@ type Detail struct {
} }
type requestedModelAliasContextKey struct{} type requestedModelAliasContextKey struct{}
type reasoningEffortContextKey struct{}
// WithRequestedModelAlias stores the client-requested model name for usage sinks. // WithRequestedModelAlias stores the client-requested model name for usage sinks.
func WithRequestedModelAlias(ctx context.Context, alias string) context.Context { func WithRequestedModelAlias(ctx context.Context, alias string) context.Context {
@@ -76,6 +79,34 @@ func RequestedModelAliasFromContext(ctx context.Context) string {
} }
} }
// WithReasoningEffort stores the client-requested reasoning effort for usage sinks.
func WithReasoningEffort(ctx context.Context, effort string) context.Context {
if ctx == nil {
ctx = context.Background()
}
effort = strings.TrimSpace(effort)
if effort == "" {
return ctx
}
return context.WithValue(ctx, reasoningEffortContextKey{}, effort)
}
// ReasoningEffortFromContext returns the client-requested reasoning effort stored in ctx.
func ReasoningEffortFromContext(ctx context.Context) string {
if ctx == nil {
return ""
}
raw := ctx.Value(reasoningEffortContextKey{})
switch value := raw.(type) {
case string:
return strings.TrimSpace(value)
case []byte:
return strings.TrimSpace(string(value))
default:
return ""
}
}
// Plugin consumes usage records emitted by the proxy runtime. // Plugin consumes usage records emitted by the proxy runtime.
type Plugin interface { type Plugin interface {
HandleUsage(ctx context.Context, record Record) HandleUsage(ctx context.Context, record Record)