Skip to content

Commit 19bd598

Browse files
committed
fix: add thinking parameter for OpenAI-compatible APIs when supportsReasoningBinary is true
Fixes #11001 When using OpenAI-compatible APIs like volcengine Ark API with reasoning effort enabled, the API requires both: 1. reasoning_effort parameter (e.g., "medium") 2. thinking parameter set to { type: "enabled" } This change adds the thinking parameter when: - Reasoning effort is being used (reasoning object is present), AND - The model's supportsReasoningBinary flag is true The fix is applied to both: - createMessage method (for regular models with streaming) - handleO3FamilyMessage method (for O3 family models) Users can enable this by setting supportsReasoningBinary: true in their custom model info configuration.
1 parent d748de6 commit 19bd598

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

src/api/providers/__tests__/openai.spec.ts

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,54 @@ describe("OpenAiHandler", () => {
374374
expect(callArgs.reasoning_effort).toBe("high")
375375
})
376376

377+
it("should include thinking parameter when supportsReasoningBinary is true and reasoning effort is enabled", async () => {
378+
const reasoningBinaryOptions: ApiHandlerOptions = {
379+
...mockOptions,
380+
enableReasoningEffort: true,
381+
openAiCustomModelInfo: {
382+
contextWindow: 128_000,
383+
supportsPromptCache: false,
384+
supportsReasoningEffort: true,
385+
supportsReasoningBinary: true,
386+
reasoningEffort: "medium",
387+
},
388+
}
389+
const reasoningBinaryHandler = new OpenAiHandler(reasoningBinaryOptions)
390+
const stream = reasoningBinaryHandler.createMessage(systemPrompt, messages)
391+
// Consume the stream to trigger the API call
392+
for await (const _chunk of stream) {
393+
}
394+
// Assert the mockCreate was called with both reasoning_effort and thinking
395+
expect(mockCreate).toHaveBeenCalled()
396+
const callArgs = mockCreate.mock.calls[0][0]
397+
expect(callArgs.reasoning_effort).toBe("medium")
398+
expect(callArgs.thinking).toEqual({ type: "enabled" })
399+
})
400+
401+
it("should not include thinking parameter when supportsReasoningBinary is false even with reasoning effort enabled", async () => {
402+
const noReasoningBinaryOptions: ApiHandlerOptions = {
403+
...mockOptions,
404+
enableReasoningEffort: true,
405+
openAiCustomModelInfo: {
406+
contextWindow: 128_000,
407+
supportsPromptCache: false,
408+
supportsReasoningEffort: true,
409+
supportsReasoningBinary: false,
410+
reasoningEffort: "high",
411+
},
412+
}
413+
const noReasoningBinaryHandler = new OpenAiHandler(noReasoningBinaryOptions)
414+
const stream = noReasoningBinaryHandler.createMessage(systemPrompt, messages)
415+
// Consume the stream to trigger the API call
416+
for await (const _chunk of stream) {
417+
}
418+
// Assert the mockCreate was called with reasoning_effort but NOT thinking
419+
expect(mockCreate).toHaveBeenCalled()
420+
const callArgs = mockCreate.mock.calls[0][0]
421+
expect(callArgs.reasoning_effort).toBe("high")
422+
expect(callArgs.thinking).toBeUndefined()
423+
})
424+
377425
it("should not include reasoning_effort when reasoning effort is disabled", async () => {
378426
const noReasoningOptions: ApiHandlerOptions = {
379427
...mockOptions,
@@ -1138,6 +1186,103 @@ describe("OpenAiHandler", () => {
11381186
{ path: "/models/chat/completions" },
11391187
)
11401188
})
1189+
1190+
it("should include thinking parameter for O3 model when supportsReasoningBinary is true", async () => {
1191+
const o3ReasoningBinaryHandler = new OpenAiHandler({
1192+
...mockOptions,
1193+
openAiModelId: "o3-mini",
1194+
openAiCustomModelInfo: {
1195+
contextWindow: 128_000,
1196+
maxTokens: 65536,
1197+
supportsPromptCache: false,
1198+
reasoningEffort: "medium" as "low" | "medium" | "high",
1199+
supportsReasoningBinary: true,
1200+
},
1201+
})
1202+
const systemPrompt = "You are a helpful assistant."
1203+
const messages: Anthropic.Messages.MessageParam[] = [
1204+
{
1205+
role: "user",
1206+
content: "Hello!",
1207+
},
1208+
]
1209+
1210+
const stream = o3ReasoningBinaryHandler.createMessage(systemPrompt, messages)
1211+
const chunks: any[] = []
1212+
for await (const chunk of stream) {
1213+
chunks.push(chunk)
1214+
}
1215+
1216+
expect(mockCreate).toHaveBeenCalled()
1217+
const callArgs = mockCreate.mock.calls[0][0]
1218+
expect(callArgs.reasoning_effort).toBe("medium")
1219+
expect(callArgs.thinking).toEqual({ type: "enabled" })
1220+
})
1221+
1222+
it("should not include thinking parameter for O3 model when supportsReasoningBinary is false", async () => {
1223+
const o3NoReasoningBinaryHandler = new OpenAiHandler({
1224+
...mockOptions,
1225+
openAiModelId: "o3-mini",
1226+
openAiCustomModelInfo: {
1227+
contextWindow: 128_000,
1228+
maxTokens: 65536,
1229+
supportsPromptCache: false,
1230+
reasoningEffort: "high" as "low" | "medium" | "high",
1231+
supportsReasoningBinary: false,
1232+
},
1233+
})
1234+
const systemPrompt = "You are a helpful assistant."
1235+
const messages: Anthropic.Messages.MessageParam[] = [
1236+
{
1237+
role: "user",
1238+
content: "Hello!",
1239+
},
1240+
]
1241+
1242+
const stream = o3NoReasoningBinaryHandler.createMessage(systemPrompt, messages)
1243+
const chunks: any[] = []
1244+
for await (const chunk of stream) {
1245+
chunks.push(chunk)
1246+
}
1247+
1248+
expect(mockCreate).toHaveBeenCalled()
1249+
const callArgs = mockCreate.mock.calls[0][0]
1250+
expect(callArgs.reasoning_effort).toBe("high")
1251+
expect(callArgs.thinking).toBeUndefined()
1252+
})
1253+
1254+
it("should include thinking parameter for O3 model in non-streaming mode when supportsReasoningBinary is true", async () => {
1255+
const o3NonStreamingHandler = new OpenAiHandler({
1256+
...mockOptions,
1257+
openAiModelId: "o3-mini",
1258+
openAiStreamingEnabled: false,
1259+
openAiCustomModelInfo: {
1260+
contextWindow: 128_000,
1261+
maxTokens: 65536,
1262+
supportsPromptCache: false,
1263+
reasoningEffort: "low" as "low" | "medium" | "high",
1264+
supportsReasoningBinary: true,
1265+
},
1266+
})
1267+
const systemPrompt = "You are a helpful assistant."
1268+
const messages: Anthropic.Messages.MessageParam[] = [
1269+
{
1270+
role: "user",
1271+
content: "Hello!",
1272+
},
1273+
]
1274+
1275+
const stream = o3NonStreamingHandler.createMessage(systemPrompt, messages)
1276+
const chunks: any[] = []
1277+
for await (const chunk of stream) {
1278+
chunks.push(chunk)
1279+
}
1280+
1281+
expect(mockCreate).toHaveBeenCalled()
1282+
const callArgs = mockCreate.mock.calls[0][0]
1283+
expect(callArgs.reasoning_effort).toBe("low")
1284+
expect(callArgs.thinking).toEqual({ type: "enabled" })
1285+
})
11411286
})
11421287
})
11431288

src/api/providers/openai.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,8 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
159159
stream: true as const,
160160
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
161161
...(reasoning && reasoning),
162+
// Add thinking parameter for OpenAI-compatible APIs that require it when using reasoning effort
163+
...(reasoning && modelInfo.supportsReasoningBinary ? { thinking: { type: "enabled" } } : {}),
162164
tools: this.convertToolsForOpenAI(metadata?.tools),
163165
tool_choice: metadata?.tool_choice,
164166
parallel_tool_calls: metadata?.parallelToolCalls ?? false,
@@ -344,6 +346,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
344346
stream: true,
345347
...(isGrokXAI ? {} : { stream_options: { include_usage: true } }),
346348
reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
349+
// Add thinking parameter for OpenAI-compatible APIs that require it when using reasoning effort
350+
...(modelInfo.reasoningEffort && modelInfo.supportsReasoningBinary
351+
? { thinking: { type: "enabled" } }
352+
: {}),
347353
temperature: undefined,
348354
// Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS)
349355
tools: this.convertToolsForOpenAI(metadata?.tools),
@@ -378,6 +384,10 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
378384
...convertToOpenAiMessages(messages),
379385
],
380386
reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined,
387+
// Add thinking parameter for OpenAI-compatible APIs that require it when using reasoning effort
388+
...(modelInfo.reasoningEffort && modelInfo.supportsReasoningBinary
389+
? { thinking: { type: "enabled" } }
390+
: {}),
381391
temperature: undefined,
382392
// Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS)
383393
tools: this.convertToolsForOpenAI(metadata?.tools),

0 commit comments

Comments
 (0)