feat(translator): add partial and full image generation support in Codex-GPT and Codex-Gemini flows
- Introduced `LastImageHashByItemID` in Codex-GPT and `LastImageHashByID` in Codex-Gemini for deduplication of generated images. - Added support for handling `partial_image` and `image_generation_call` types, with inline data embedding for Gemini and URL payload conversion for GPT. - Extended unit tests to verify image handling in both streaming and non-streaming modes.
This commit is contained in:
@@ -7,6 +7,8 @@ package gemini
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
translatorcommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/common"
|
translatorcommon "github.com/router-for-me/CLIProxyAPI/v6/internal/translator/common"
|
||||||
@@ -25,6 +27,7 @@ type ConvertCodexResponseToGeminiParams struct {
|
|||||||
ResponseID string
|
ResponseID string
|
||||||
LastStorageOutput []byte
|
LastStorageOutput []byte
|
||||||
HasOutputTextDelta bool
|
HasOutputTextDelta bool
|
||||||
|
LastImageHashByID map[string][32]byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConvertCodexResponseToGemini converts Codex streaming response format to Gemini format.
|
// ConvertCodexResponseToGemini converts Codex streaming response format to Gemini format.
|
||||||
@@ -48,6 +51,7 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
|
|||||||
ResponseID: "",
|
ResponseID: "",
|
||||||
LastStorageOutput: nil,
|
LastStorageOutput: nil,
|
||||||
HasOutputTextDelta: false,
|
HasOutputTextDelta: false,
|
||||||
|
LastImageHashByID: make(map[string][32]byte),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,10 +78,63 @@ func ConvertCodexResponseToGemini(_ context.Context, modelName string, originalR
|
|||||||
template, _ = sjson.SetBytes(template, "responseId", params.ResponseID)
|
template, _ = sjson.SetBytes(template, "responseId", params.ResponseID)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if typeStr == "response.image_generation_call.partial_image" {
|
||||||
|
itemID := rootResult.Get("item_id").String()
|
||||||
|
b64 := rootResult.Get("partial_image_b64").String()
|
||||||
|
if b64 == "" {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
if itemID != "" {
|
||||||
|
if params.LastImageHashByID == nil {
|
||||||
|
params.LastImageHashByID = make(map[string][32]byte)
|
||||||
|
}
|
||||||
|
hash := sha256.Sum256([]byte(b64))
|
||||||
|
if last, ok := params.LastImageHashByID[itemID]; ok && last == hash {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
params.LastImageHashByID[itemID] = hash
|
||||||
|
}
|
||||||
|
|
||||||
|
outputFormat := rootResult.Get("output_format").String()
|
||||||
|
mimeType := mimeTypeFromCodexOutputFormat(outputFormat)
|
||||||
|
|
||||||
|
part := []byte(`{"inlineData":{"data":"","mimeType":""}}`)
|
||||||
|
part, _ = sjson.SetBytes(part, "inlineData.data", b64)
|
||||||
|
part, _ = sjson.SetBytes(part, "inlineData.mimeType", mimeType)
|
||||||
|
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
|
||||||
|
return [][]byte{template}
|
||||||
|
}
|
||||||
|
|
||||||
// Handle function call completion
|
// Handle function call completion
|
||||||
if typeStr == "response.output_item.done" {
|
if typeStr == "response.output_item.done" {
|
||||||
itemResult := rootResult.Get("item")
|
itemResult := rootResult.Get("item")
|
||||||
itemType := itemResult.Get("type").String()
|
itemType := itemResult.Get("type").String()
|
||||||
|
if itemType == "image_generation_call" {
|
||||||
|
itemID := itemResult.Get("id").String()
|
||||||
|
b64 := itemResult.Get("result").String()
|
||||||
|
if b64 == "" {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
if itemID != "" {
|
||||||
|
if params.LastImageHashByID == nil {
|
||||||
|
params.LastImageHashByID = make(map[string][32]byte)
|
||||||
|
}
|
||||||
|
hash := sha256.Sum256([]byte(b64))
|
||||||
|
if last, ok := params.LastImageHashByID[itemID]; ok && last == hash {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
params.LastImageHashByID[itemID] = hash
|
||||||
|
}
|
||||||
|
|
||||||
|
outputFormat := itemResult.Get("output_format").String()
|
||||||
|
mimeType := mimeTypeFromCodexOutputFormat(outputFormat)
|
||||||
|
|
||||||
|
part := []byte(`{"inlineData":{"data":"","mimeType":""}}`)
|
||||||
|
part, _ = sjson.SetBytes(part, "inlineData.data", b64)
|
||||||
|
part, _ = sjson.SetBytes(part, "inlineData.mimeType", mimeType)
|
||||||
|
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
|
||||||
|
return [][]byte{template}
|
||||||
|
}
|
||||||
if itemType == "function_call" {
|
if itemType == "function_call" {
|
||||||
// Create function call part
|
// Create function call part
|
||||||
functionCall := []byte(`{"functionCall":{"name":"","args":{}}}`)
|
functionCall := []byte(`{"functionCall":{"name":"","args":{}}}`)
|
||||||
@@ -270,6 +327,20 @@ func ConvertCodexResponseToGeminiNonStream(_ context.Context, modelName string,
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case "image_generation_call":
|
||||||
|
flushPendingFunctionCalls()
|
||||||
|
b64 := value.Get("result").String()
|
||||||
|
if b64 == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
outputFormat := value.Get("output_format").String()
|
||||||
|
mimeType := mimeTypeFromCodexOutputFormat(outputFormat)
|
||||||
|
|
||||||
|
part := []byte(`{"inlineData":{"data":"","mimeType":""}}`)
|
||||||
|
part, _ = sjson.SetBytes(part, "inlineData.data", b64)
|
||||||
|
part, _ = sjson.SetBytes(part, "inlineData.mimeType", mimeType)
|
||||||
|
template, _ = sjson.SetRawBytes(template, "candidates.0.content.parts.-1", part)
|
||||||
|
|
||||||
case "function_call":
|
case "function_call":
|
||||||
// Collect function call for potential merging with consecutive ones
|
// Collect function call for potential merging with consecutive ones
|
||||||
hasToolCall = true
|
hasToolCall = true
|
||||||
@@ -342,3 +413,24 @@ func buildReverseMapFromGeminiOriginal(original []byte) map[string]string {
|
|||||||
func GeminiTokenCount(ctx context.Context, count int64) []byte {
|
func GeminiTokenCount(ctx context.Context, count int64) []byte {
|
||||||
return translatorcommon.GeminiTokenCountJSON(count)
|
return translatorcommon.GeminiTokenCountJSON(count)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mimeTypeFromCodexOutputFormat(outputFormat string) string {
|
||||||
|
if outputFormat == "" {
|
||||||
|
return "image/png"
|
||||||
|
}
|
||||||
|
if strings.Contains(outputFormat, "/") {
|
||||||
|
return outputFormat
|
||||||
|
}
|
||||||
|
switch strings.ToLower(outputFormat) {
|
||||||
|
case "png":
|
||||||
|
return "image/png"
|
||||||
|
case "jpg", "jpeg":
|
||||||
|
return "image/jpeg"
|
||||||
|
case "webp":
|
||||||
|
return "image/webp"
|
||||||
|
case "gif":
|
||||||
|
return "image/gif"
|
||||||
|
default:
|
||||||
|
return "image/png"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -33,3 +33,79 @@ func TestConvertCodexResponseToGemini_StreamEmptyOutputUsesOutputItemDoneMessage
|
|||||||
t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
|
t.Fatalf("expected fallback content from response.output_item.done message; outputs=%q", outputs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertCodexResponseToGemini_StreamPartialImageEmitsInlineData(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
originalRequest := []byte(`{"tools":[]}`)
|
||||||
|
var param any
|
||||||
|
|
||||||
|
chunk := []byte(`data: {"type":"response.image_generation_call.partial_image","item_id":"ig_123","output_format":"png","partial_image_b64":"aGVsbG8=","partial_image_index":0}`)
|
||||||
|
out := ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, chunk, ¶m)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 chunk, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
got := gjson.GetBytes(out[0], "candidates.0.content.parts.0.inlineData.data").String()
|
||||||
|
if got != "aGVsbG8=" {
|
||||||
|
t.Fatalf("expected inlineData.data %q, got %q; chunk=%s", "aGVsbG8=", got, string(out[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
gotMime := gjson.GetBytes(out[0], "candidates.0.content.parts.0.inlineData.mimeType").String()
|
||||||
|
if gotMime != "image/png" {
|
||||||
|
t.Fatalf("expected inlineData.mimeType %q, got %q; chunk=%s", "image/png", gotMime, string(out[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
out = ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, chunk, ¶m)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatalf("expected duplicate image chunk to be suppressed, got %d", len(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertCodexResponseToGemini_StreamImageGenerationCallDoneEmitsInlineData(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
originalRequest := []byte(`{"tools":[]}`)
|
||||||
|
var param any
|
||||||
|
|
||||||
|
out := ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, []byte(`data: {"type":"response.image_generation_call.partial_image","item_id":"ig_123","output_format":"png","partial_image_b64":"aGVsbG8=","partial_image_index":0}`), ¶m)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 chunk, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
out = ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, []byte(`data: {"type":"response.output_item.done","item":{"id":"ig_123","type":"image_generation_call","output_format":"png","result":"aGVsbG8="}}`), ¶m)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatalf("expected output_item.done to be suppressed when identical to last partial image, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
out = ConvertCodexResponseToGemini(ctx, "gemini-2.5-pro", originalRequest, nil, []byte(`data: {"type":"response.output_item.done","item":{"id":"ig_123","type":"image_generation_call","output_format":"jpeg","result":"Ymll"}}`), ¶m)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 chunk, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
got := gjson.GetBytes(out[0], "candidates.0.content.parts.0.inlineData.data").String()
|
||||||
|
if got != "Ymll" {
|
||||||
|
t.Fatalf("expected inlineData.data %q, got %q; chunk=%s", "Ymll", got, string(out[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
gotMime := gjson.GetBytes(out[0], "candidates.0.content.parts.0.inlineData.mimeType").String()
|
||||||
|
if gotMime != "image/jpeg" {
|
||||||
|
t.Fatalf("expected inlineData.mimeType %q, got %q; chunk=%s", "image/jpeg", gotMime, string(out[0]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertCodexResponseToGemini_NonStreamImageGenerationCallAddsInlineDataPart(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
originalRequest := []byte(`{"tools":[]}`)
|
||||||
|
|
||||||
|
raw := []byte(`{"type":"response.completed","response":{"id":"resp_123","created_at":1700000000,"usage":{"input_tokens":1,"output_tokens":1},"output":[{"type":"message","content":[{"type":"output_text","text":"ok"}]},{"type":"image_generation_call","output_format":"png","result":"aGVsbG8="}]}}`)
|
||||||
|
out := ConvertCodexResponseToGeminiNonStream(ctx, "gemini-2.5-pro", originalRequest, nil, raw, nil)
|
||||||
|
|
||||||
|
got := gjson.GetBytes(out, "candidates.0.content.parts.1.inlineData.data").String()
|
||||||
|
if got != "aGVsbG8=" {
|
||||||
|
t.Fatalf("expected inlineData.data %q, got %q; chunk=%s", "aGVsbG8=", got, string(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
gotMime := gjson.GetBytes(out, "candidates.0.content.parts.1.inlineData.mimeType").String()
|
||||||
|
if gotMime != "image/png" {
|
||||||
|
t.Fatalf("expected inlineData.mimeType %q, got %q; chunk=%s", "image/png", gotMime, string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ package chat_completions
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
@@ -26,6 +28,7 @@ type ConvertCliToOpenAIParams struct {
|
|||||||
FunctionCallIndex int
|
FunctionCallIndex int
|
||||||
HasReceivedArgumentsDelta bool
|
HasReceivedArgumentsDelta bool
|
||||||
HasToolCallAnnounced bool
|
HasToolCallAnnounced bool
|
||||||
|
LastImageHashByItemID map[string][32]byte
|
||||||
}
|
}
|
||||||
|
|
||||||
// ConvertCodexResponseToOpenAI translates a single chunk of a streaming response from the
|
// ConvertCodexResponseToOpenAI translates a single chunk of a streaming response from the
|
||||||
@@ -51,6 +54,7 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
|
|||||||
FunctionCallIndex: -1,
|
FunctionCallIndex: -1,
|
||||||
HasReceivedArgumentsDelta: false,
|
HasReceivedArgumentsDelta: false,
|
||||||
HasToolCallAnnounced: false,
|
HasToolCallAnnounced: false,
|
||||||
|
LastImageHashByItemID: make(map[string][32]byte),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -70,6 +74,9 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
|
|||||||
(*param).(*ConvertCliToOpenAIParams).ResponseID = rootResult.Get("response.id").String()
|
(*param).(*ConvertCliToOpenAIParams).ResponseID = rootResult.Get("response.id").String()
|
||||||
(*param).(*ConvertCliToOpenAIParams).CreatedAt = rootResult.Get("response.created_at").Int()
|
(*param).(*ConvertCliToOpenAIParams).CreatedAt = rootResult.Get("response.created_at").Int()
|
||||||
(*param).(*ConvertCliToOpenAIParams).Model = rootResult.Get("response.model").String()
|
(*param).(*ConvertCliToOpenAIParams).Model = rootResult.Get("response.model").String()
|
||||||
|
if (*param).(*ConvertCliToOpenAIParams).LastImageHashByItemID == nil {
|
||||||
|
(*param).(*ConvertCliToOpenAIParams).LastImageHashByItemID = make(map[string][32]byte)
|
||||||
|
}
|
||||||
return [][]byte{}
|
return [][]byte{}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -120,6 +127,39 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
|
|||||||
template, _ = sjson.SetBytes(template, "choices.0.delta.role", "assistant")
|
template, _ = sjson.SetBytes(template, "choices.0.delta.role", "assistant")
|
||||||
template, _ = sjson.SetBytes(template, "choices.0.delta.content", deltaResult.String())
|
template, _ = sjson.SetBytes(template, "choices.0.delta.content", deltaResult.String())
|
||||||
}
|
}
|
||||||
|
} else if dataType == "response.image_generation_call.partial_image" {
|
||||||
|
itemID := rootResult.Get("item_id").String()
|
||||||
|
b64 := rootResult.Get("partial_image_b64").String()
|
||||||
|
if b64 == "" {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
if itemID != "" {
|
||||||
|
p := (*param).(*ConvertCliToOpenAIParams)
|
||||||
|
if p.LastImageHashByItemID == nil {
|
||||||
|
p.LastImageHashByItemID = make(map[string][32]byte)
|
||||||
|
}
|
||||||
|
hash := sha256.Sum256([]byte(b64))
|
||||||
|
if last, ok := p.LastImageHashByItemID[itemID]; ok && last == hash {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
p.LastImageHashByItemID[itemID] = hash
|
||||||
|
}
|
||||||
|
|
||||||
|
outputFormat := rootResult.Get("output_format").String()
|
||||||
|
mimeType := mimeTypeFromCodexOutputFormat(outputFormat)
|
||||||
|
imageURL := "data:" + mimeType + ";base64," + b64
|
||||||
|
|
||||||
|
imagesResult := gjson.GetBytes(template, "choices.0.delta.images")
|
||||||
|
if !imagesResult.Exists() || !imagesResult.IsArray() {
|
||||||
|
template, _ = sjson.SetRawBytes(template, "choices.0.delta.images", []byte(`[]`))
|
||||||
|
}
|
||||||
|
imageIndex := len(gjson.GetBytes(template, "choices.0.delta.images").Array())
|
||||||
|
imagePayload := []byte(`{"type":"image_url","image_url":{"url":""}}`)
|
||||||
|
imagePayload, _ = sjson.SetBytes(imagePayload, "index", imageIndex)
|
||||||
|
imagePayload, _ = sjson.SetBytes(imagePayload, "image_url.url", imageURL)
|
||||||
|
|
||||||
|
template, _ = sjson.SetBytes(template, "choices.0.delta.role", "assistant")
|
||||||
|
template, _ = sjson.SetRawBytes(template, "choices.0.delta.images.-1", imagePayload)
|
||||||
} else if dataType == "response.completed" {
|
} else if dataType == "response.completed" {
|
||||||
finishReason := "stop"
|
finishReason := "stop"
|
||||||
if (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex != -1 {
|
if (*param).(*ConvertCliToOpenAIParams).FunctionCallIndex != -1 {
|
||||||
@@ -183,7 +223,46 @@ func ConvertCodexResponseToOpenAI(_ context.Context, modelName string, originalR
|
|||||||
|
|
||||||
} else if dataType == "response.output_item.done" {
|
} else if dataType == "response.output_item.done" {
|
||||||
itemResult := rootResult.Get("item")
|
itemResult := rootResult.Get("item")
|
||||||
if !itemResult.Exists() || itemResult.Get("type").String() != "function_call" {
|
if !itemResult.Exists() {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
itemType := itemResult.Get("type").String()
|
||||||
|
if itemType == "image_generation_call" {
|
||||||
|
itemID := itemResult.Get("id").String()
|
||||||
|
b64 := itemResult.Get("result").String()
|
||||||
|
if b64 == "" {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
if itemID != "" {
|
||||||
|
p := (*param).(*ConvertCliToOpenAIParams)
|
||||||
|
if p.LastImageHashByItemID == nil {
|
||||||
|
p.LastImageHashByItemID = make(map[string][32]byte)
|
||||||
|
}
|
||||||
|
hash := sha256.Sum256([]byte(b64))
|
||||||
|
if last, ok := p.LastImageHashByItemID[itemID]; ok && last == hash {
|
||||||
|
return [][]byte{}
|
||||||
|
}
|
||||||
|
p.LastImageHashByItemID[itemID] = hash
|
||||||
|
}
|
||||||
|
|
||||||
|
outputFormat := itemResult.Get("output_format").String()
|
||||||
|
mimeType := mimeTypeFromCodexOutputFormat(outputFormat)
|
||||||
|
imageURL := "data:" + mimeType + ";base64," + b64
|
||||||
|
|
||||||
|
imagesResult := gjson.GetBytes(template, "choices.0.delta.images")
|
||||||
|
if !imagesResult.Exists() || !imagesResult.IsArray() {
|
||||||
|
template, _ = sjson.SetRawBytes(template, "choices.0.delta.images", []byte(`[]`))
|
||||||
|
}
|
||||||
|
imageIndex := len(gjson.GetBytes(template, "choices.0.delta.images").Array())
|
||||||
|
imagePayload := []byte(`{"type":"image_url","image_url":{"url":""}}`)
|
||||||
|
imagePayload, _ = sjson.SetBytes(imagePayload, "index", imageIndex)
|
||||||
|
imagePayload, _ = sjson.SetBytes(imagePayload, "image_url.url", imageURL)
|
||||||
|
|
||||||
|
template, _ = sjson.SetBytes(template, "choices.0.delta.role", "assistant")
|
||||||
|
template, _ = sjson.SetRawBytes(template, "choices.0.delta.images.-1", imagePayload)
|
||||||
|
return [][]byte{template}
|
||||||
|
}
|
||||||
|
if itemType != "function_call" {
|
||||||
return [][]byte{}
|
return [][]byte{}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -285,6 +364,7 @@ func ConvertCodexResponseToOpenAINonStream(_ context.Context, _ string, original
|
|||||||
|
|
||||||
// Process the output array for content and function calls
|
// Process the output array for content and function calls
|
||||||
var toolCalls [][]byte
|
var toolCalls [][]byte
|
||||||
|
var images [][]byte
|
||||||
outputResult := responseResult.Get("output")
|
outputResult := responseResult.Get("output")
|
||||||
if outputResult.IsArray() {
|
if outputResult.IsArray() {
|
||||||
outputArray := outputResult.Array()
|
outputArray := outputResult.Array()
|
||||||
@@ -339,6 +419,19 @@ func ConvertCodexResponseToOpenAINonStream(_ context.Context, _ string, original
|
|||||||
}
|
}
|
||||||
|
|
||||||
toolCalls = append(toolCalls, functionCallTemplate)
|
toolCalls = append(toolCalls, functionCallTemplate)
|
||||||
|
case "image_generation_call":
|
||||||
|
b64 := outputItem.Get("result").String()
|
||||||
|
if b64 == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
outputFormat := outputItem.Get("output_format").String()
|
||||||
|
mimeType := mimeTypeFromCodexOutputFormat(outputFormat)
|
||||||
|
imageURL := "data:" + mimeType + ";base64," + b64
|
||||||
|
|
||||||
|
imagePayload := []byte(`{"type":"image_url","image_url":{"url":""}}`)
|
||||||
|
imagePayload, _ = sjson.SetBytes(imagePayload, "index", len(images))
|
||||||
|
imagePayload, _ = sjson.SetBytes(imagePayload, "image_url.url", imageURL)
|
||||||
|
images = append(images, imagePayload)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -361,6 +454,15 @@ func ConvertCodexResponseToOpenAINonStream(_ context.Context, _ string, original
|
|||||||
}
|
}
|
||||||
template, _ = sjson.SetBytes(template, "choices.0.message.role", "assistant")
|
template, _ = sjson.SetBytes(template, "choices.0.message.role", "assistant")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add images if any
|
||||||
|
if len(images) > 0 {
|
||||||
|
template, _ = sjson.SetRawBytes(template, "choices.0.message.images", []byte(`[]`))
|
||||||
|
for _, image := range images {
|
||||||
|
template, _ = sjson.SetRawBytes(template, "choices.0.message.images.-1", image)
|
||||||
|
}
|
||||||
|
template, _ = sjson.SetBytes(template, "choices.0.message.role", "assistant")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract and set the finish reason based on status
|
// Extract and set the finish reason based on status
|
||||||
@@ -409,3 +511,24 @@ func buildReverseMapFromOriginalOpenAI(original []byte) map[string]string {
|
|||||||
}
|
}
|
||||||
return rev
|
return rev
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func mimeTypeFromCodexOutputFormat(outputFormat string) string {
|
||||||
|
if outputFormat == "" {
|
||||||
|
return "image/png"
|
||||||
|
}
|
||||||
|
if strings.Contains(outputFormat, "/") {
|
||||||
|
return outputFormat
|
||||||
|
}
|
||||||
|
switch strings.ToLower(outputFormat) {
|
||||||
|
case "png":
|
||||||
|
return "image/png"
|
||||||
|
case "jpg", "jpeg":
|
||||||
|
return "image/jpeg"
|
||||||
|
case "webp":
|
||||||
|
return "image/webp"
|
||||||
|
case "gif":
|
||||||
|
return "image/gif"
|
||||||
|
default:
|
||||||
|
return "image/png"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -90,3 +90,62 @@ func TestConvertCodexResponseToOpenAI_ToolCallArgumentsDeltaOmitsNullContentFiel
|
|||||||
t.Fatalf("expected tool call arguments delta to exist, got %s", string(out[0]))
|
t.Fatalf("expected tool call arguments delta to exist, got %s", string(out[0]))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestConvertCodexResponseToOpenAI_StreamPartialImageEmitsDeltaImages(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
var param any
|
||||||
|
|
||||||
|
chunk := []byte(`data: {"type":"response.image_generation_call.partial_image","item_id":"ig_123","output_format":"png","partial_image_b64":"aGVsbG8=","partial_image_index":0}`)
|
||||||
|
|
||||||
|
out := ConvertCodexResponseToOpenAI(ctx, "gpt-5.4", nil, nil, chunk, ¶m)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 chunk, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
gotURL := gjson.GetBytes(out[0], "choices.0.delta.images.0.image_url.url").String()
|
||||||
|
if gotURL != "data:image/png;base64,aGVsbG8=" {
|
||||||
|
t.Fatalf("expected image url %q, got %q; chunk=%s", "data:image/png;base64,aGVsbG8=", gotURL, string(out[0]))
|
||||||
|
}
|
||||||
|
|
||||||
|
out = ConvertCodexResponseToOpenAI(ctx, "gpt-5.4", nil, nil, chunk, ¶m)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatalf("expected duplicate image chunk to be suppressed, got %d", len(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertCodexResponseToOpenAI_StreamImageGenerationCallDoneEmitsDeltaImages(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
var param any
|
||||||
|
|
||||||
|
out := ConvertCodexResponseToOpenAI(ctx, "gpt-5.4", nil, nil, []byte(`data: {"type":"response.image_generation_call.partial_image","item_id":"ig_123","output_format":"png","partial_image_b64":"aGVsbG8=","partial_image_index":0}`), ¶m)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 chunk, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
out = ConvertCodexResponseToOpenAI(ctx, "gpt-5.4", nil, nil, []byte(`data: {"type":"response.output_item.done","item":{"id":"ig_123","type":"image_generation_call","output_format":"png","result":"aGVsbG8="}}`), ¶m)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatalf("expected output_item.done to be suppressed when identical to last partial image, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
out = ConvertCodexResponseToOpenAI(ctx, "gpt-5.4", nil, nil, []byte(`data: {"type":"response.output_item.done","item":{"id":"ig_123","type":"image_generation_call","output_format":"jpeg","result":"Ymll"}}`), ¶m)
|
||||||
|
if len(out) != 1 {
|
||||||
|
t.Fatalf("expected 1 chunk, got %d", len(out))
|
||||||
|
}
|
||||||
|
|
||||||
|
gotURL := gjson.GetBytes(out[0], "choices.0.delta.images.0.image_url.url").String()
|
||||||
|
if gotURL != "data:image/jpeg;base64,Ymll" {
|
||||||
|
t.Fatalf("expected image url %q, got %q; chunk=%s", "data:image/jpeg;base64,Ymll", gotURL, string(out[0]))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestConvertCodexResponseToOpenAI_NonStreamImageGenerationCallAddsMessageImages(t *testing.T) {
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
raw := []byte(`{"type":"response.completed","response":{"id":"resp_123","created_at":1700000000,"model":"gpt-5.4","status":"completed","usage":{"input_tokens":1,"output_tokens":1,"total_tokens":2},"output":[{"type":"message","content":[{"type":"output_text","text":"ok"}]},{"type":"image_generation_call","output_format":"png","result":"aGVsbG8="}]}}`)
|
||||||
|
out := ConvertCodexResponseToOpenAINonStream(ctx, "gpt-5.4", nil, nil, raw, nil)
|
||||||
|
|
||||||
|
gotURL := gjson.GetBytes(out, "choices.0.message.images.0.image_url.url").String()
|
||||||
|
if gotURL != "data:image/png;base64,aGVsbG8=" {
|
||||||
|
t.Fatalf("expected image url %q, got %q; chunk=%s", "data:image/png;base64,aGVsbG8=", gotURL, string(out))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user