feat(api, xai): integrate xAI Grok image models and extend API endpoints for image support

- Added new xAI Grok image models (`grok-imagine-image`, `grok-imagine-image-quality`) with high-fidelity and aspect ratio configurations.
- Extended `isSupportedImagesModel` logic to validate xAI models.
- Implemented API request builders for image generation/editing with customizable options (e.g., resolution, aspect ratio, response format).
- Enhanced `/v1/images` endpoints to handle xAI model capabilities, including response normalization and model-specific handlers.
- Updated unit tests to validate xAI model validation, request structure, and API integration.
This commit is contained in:
Luis Pater
2026-05-17 01:30:23 +08:00
parent e4c957078c
commit 2ff9e33e26
9 changed files with 779 additions and 43 deletions
+38 -2
View File
@@ -6,7 +6,11 @@ import (
"strings"
)
const codexBuiltinImageModelID = "gpt-image-2"
const (
codexBuiltinImageModelID = "gpt-image-2"
xaiBuiltinImageModelID = "grok-imagine-image"
xaiBuiltinImageQualityModelID = "grok-imagine-image-quality"
)
// staticModelsJSON mirrors the top-level structure of models.json.
type staticModelsJSON struct {
@@ -81,7 +85,7 @@ func GetAntigravityModels() []*ModelInfo {
// GetXAIModels returns the standard xAI Grok model definitions.
func GetXAIModels() []*ModelInfo {
return cloneModelInfos(getModels().XAI)
return WithXAIBuiltins(cloneModelInfos(getModels().XAI))
}
// WithCodexBuiltins injects hard-coded Codex-only model definitions that should
@@ -91,6 +95,12 @@ func WithCodexBuiltins(models []*ModelInfo) []*ModelInfo {
return upsertModelInfos(models, codexBuiltinImageModelInfo())
}
// WithXAIBuiltins injects hard-coded xAI image model definitions that should
// not depend on remote models.json updates.
func WithXAIBuiltins(models []*ModelInfo) []*ModelInfo {
return upsertModelInfos(models, xaiBuiltinImageModelInfo(), xaiBuiltinImageQualityModelInfo())
}
func codexBuiltinImageModelInfo() *ModelInfo {
return &ModelInfo{
ID: codexBuiltinImageModelID,
@@ -103,6 +113,32 @@ func codexBuiltinImageModelInfo() *ModelInfo {
}
}
func xaiBuiltinImageModelInfo() *ModelInfo {
return &ModelInfo{
ID: xaiBuiltinImageModelID,
Object: "model",
Created: 1735689600, // 2025-01-01
OwnedBy: "xai",
Type: "xai",
DisplayName: "Grok Imagine Image",
Name: xaiBuiltinImageModelID,
Description: "xAI Grok image generation model.",
}
}
func xaiBuiltinImageQualityModelInfo() *ModelInfo {
return &ModelInfo{
ID: xaiBuiltinImageQualityModelID,
Object: "model",
Created: 1735689600, // 2025-01-01
OwnedBy: "xai",
Type: "xai",
DisplayName: "Grok Imagine Image Quality",
Name: xaiBuiltinImageQualityModelID,
Description: "xAI Grok higher-fidelity image generation model.",
}
}
func upsertModelInfos(models []*ModelInfo, extras ...*ModelInfo) []*ModelInfo {
if len(extras) == 0 {
return models
+27 -4
View File
@@ -46,8 +46,7 @@
"levels": [
"low",
"medium",
"high",
"xhigh"
"high"
]
}
},
@@ -473,6 +472,30 @@
"dynamic_allowed": true
}
},
{
"id": "gemini-2.5-flash-image",
"object": "model",
"created": 1763596800,
"owned_by": "google",
"type": "gemini",
"display_name": "Gemini 2.5 Flash Image",
"name": "models/gemini-2.5-flash-image",
"version": "001",
"description": "Our state-of-the-art image generation and editing model.",
"inputTokenLimit": 1048576,
"outputTokenLimit": 65536,
"supportedGenerationMethods": [
"generateContent",
"countTokens",
"createCachedContent",
"batchGenerateContent"
],
"thinking": {
"max": 24576,
"zero_allowed": true,
"dynamic_allowed": true
}
},
{
"id": "gemini-2.5-flash-lite",
"object": "model",
@@ -1990,12 +2013,12 @@
}
},
{
"id": "gemini-3.1-pro-high",
"id": "gemini-pro-agent",
"object": "model",
"owned_by": "antigravity",
"type": "antigravity",
"display_name": "Gemini 3.1 Pro (High)",
"name": "gemini-3.1-pro-high",
"name": "gemini-pro-agent",
"description": "Gemini 3.1 Pro (High)",
"context_length": 1048576,
"max_completion_tokens": 65535,
+71
View File
@@ -27,6 +27,13 @@ import (
var xaiDataTag = []byte("data:")
const (
xaiImageHandlerType = "openai-image"
xaiImagesGenerationsPath = "/images/generations"
xaiImagesEditsPath = "/images/edits"
xaiDefaultImageEndpointPath = xaiImagesGenerationsPath
)
// XAIExecutor is a stateless executor for xAI Grok's Responses API.
type XAIExecutor struct {
cfg *config.Config
@@ -76,6 +83,10 @@ func (e *XAIExecutor) HttpRequest(ctx context.Context, auth *cliproxyauth.Auth,
}
func (e *XAIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (resp cliproxyexecutor.Response, err error) {
if endpointPath := xaiImageEndpointPath(opts); endpointPath != "" {
return e.executeImages(ctx, auth, req, endpointPath)
}
token, baseURL := xaiCreds(auth)
if baseURL == "" {
baseURL = xaiauth.DefaultAPIBaseURL
@@ -151,6 +162,51 @@ func (e *XAIExecutor) Execute(ctx context.Context, auth *cliproxyauth.Auth, req
return resp, statusErr{code: http.StatusRequestTimeout, msg: "xai stream error: stream disconnected before response.completed"}
}
func (e *XAIExecutor) executeImages(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, endpointPath string) (resp cliproxyexecutor.Response, err error) {
token, baseURL := xaiCreds(auth)
if baseURL == "" {
baseURL = xaiauth.DefaultAPIBaseURL
}
if endpointPath == "" {
endpointPath = xaiDefaultImageEndpointPath
}
url := strings.TrimSuffix(baseURL, "/") + endpointPath
httpReq, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(req.Payload))
if err != nil {
return resp, err
}
applyXAIHeaders(httpReq, auth, token, false, "")
e.recordXAIRequest(ctx, auth, url, httpReq.Header.Clone(), req.Payload)
httpClient := helps.NewProxyAwareHTTPClient(ctx, e.cfg, auth, 0)
httpResp, err := httpClient.Do(httpReq)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
return resp, err
}
defer func() {
if errClose := httpResp.Body.Close(); errClose != nil {
log.Errorf("xai executor: close response body error: %v", errClose)
}
}()
helps.RecordAPIResponseMetadata(ctx, e.cfg, httpResp.StatusCode, httpResp.Header.Clone())
data, err := io.ReadAll(httpResp.Body)
if err != nil {
helps.RecordAPIResponseError(ctx, e.cfg, err)
return resp, err
}
helps.AppendAPIResponseChunk(ctx, e.cfg, data)
if httpResp.StatusCode < 200 || httpResp.StatusCode >= 300 {
helps.LogWithRequestID(ctx).Debugf("request error, error status: %d, error message: %s", httpResp.StatusCode, helps.SummarizeErrorBody(httpResp.Header.Get("Content-Type"), data))
return resp, statusErr{code: httpResp.StatusCode, msg: string(data)}
}
return cliproxyexecutor.Response{Payload: data, Headers: httpResp.Header.Clone()}, nil
}
func (e *XAIExecutor) ExecuteStream(ctx context.Context, auth *cliproxyauth.Auth, req cliproxyexecutor.Request, opts cliproxyexecutor.Options) (_ *cliproxyexecutor.StreamResult, err error) {
token, baseURL := xaiCreds(auth)
if baseURL == "" {
@@ -454,6 +510,21 @@ func xaiExecutionSessionID(req cliproxyexecutor.Request, opts cliproxyexecutor.O
return ""
}
func xaiImageEndpointPath(opts cliproxyexecutor.Options) string {
if opts.SourceFormat.String() != xaiImageHandlerType {
return ""
}
path := xaiMetadataString(opts.Metadata, cliproxyexecutor.RequestPathMetadataKey)
if strings.HasSuffix(path, "/images/edits") {
return xaiImagesEditsPath
}
if strings.HasSuffix(path, "/images/generations") {
return xaiImagesGenerationsPath
}
return xaiDefaultImageEndpointPath
}
func xaiMetadataString(meta map[string]any, key string) string {
if len(meta) == 0 || key == "" {
return ""
@@ -136,3 +136,96 @@ func TestXAIExecutorOmitsUnsupportedReasoningEffort(t *testing.T) {
t.Fatalf("unsupported xAI model must omit reasoning key: %s", string(gotBody))
}
}
func TestXAIExecutorExecuteImagesUsesImagesEndpoint(t *testing.T) {
var gotPath string
var gotAuth string
var gotAccept string
var gotBody []byte
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
gotAuth = r.Header.Get("Authorization")
gotAccept = r.Header.Get("Accept")
var errRead error
gotBody, errRead = io.ReadAll(r.Body)
if errRead != nil {
t.Fatalf("read body: %v", errRead)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"created":123,"data":[{"b64_json":"AA=="}]}`))
}))
defer server.Close()
exec := NewXAIExecutor(&config.Config{})
auth := &cliproxyauth.Auth{
Provider: "xai",
Attributes: map[string]string{
"base_url": server.URL,
"auth_kind": "oauth",
},
Metadata: map[string]any{"access_token": "xai-token"},
}
resp, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "grok-imagine-image",
Payload: []byte(`{"model":"grok-imagine-image","prompt":"draw"}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai-image"),
Metadata: map[string]any{
cliproxyexecutor.RequestPathMetadataKey: "/v1/images/generations",
},
})
if err != nil {
t.Fatalf("Execute() error = %v", err)
}
if gotPath != "/images/generations" {
t.Fatalf("path = %q, want /images/generations", gotPath)
}
if gotAuth != "Bearer xai-token" {
t.Fatalf("Authorization = %q, want Bearer xai-token", gotAuth)
}
if gotAccept != "application/json" {
t.Fatalf("Accept = %q, want application/json", gotAccept)
}
if string(gotBody) != `{"model":"grok-imagine-image","prompt":"draw"}` {
t.Fatalf("body = %s", string(gotBody))
}
if gjson.GetBytes(resp.Payload, "data.0.b64_json").String() != "AA==" {
t.Fatalf("payload = %s", string(resp.Payload))
}
}
func TestXAIExecutorExecuteImagesUsesEditsEndpoint(t *testing.T) {
var gotPath string
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotPath = r.URL.Path
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"created":123,"data":[{"url":"https://x.ai/image.png"}]}`))
}))
defer server.Close()
exec := NewXAIExecutor(&config.Config{})
auth := &cliproxyauth.Auth{
Provider: "xai",
Attributes: map[string]string{"base_url": server.URL},
Metadata: map[string]any{"access_token": "xai-token"},
}
_, err := exec.Execute(context.Background(), auth, cliproxyexecutor.Request{
Model: "grok-imagine-image",
Payload: []byte(`{"model":"grok-imagine-image","prompt":"edit","image":{"type":"image_url","url":"https://example.com/a.png"}}`),
}, cliproxyexecutor.Options{
SourceFormat: sdktranslator.FromString("openai-image"),
Metadata: map[string]any{
cliproxyexecutor.RequestPathMetadataKey: "/v1/images/edits",
},
})
if err != nil {
t.Fatalf("Execute() error = %v", err)
}
if gotPath != "/images/edits" {
t.Fatalf("path = %q, want /images/edits", gotPath)
}
}