{"version":3,"file":"openai-completions.d.ts","sourceRoot":"","sources":["../../src/providers/openai-completions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAOX,0BAA0B,EAG1B,MAAM,sCAAsC,CAAC;AAG9C,OAAO,KAAK,EAGX,OAAO,EAGP,KAAK,EACL,uBAAuB,EACvB,mBAAmB,EAEnB,cAAc,EACd,aAAa,EAMb,MAAM,aAAa,CAAC;AA6CrB,MAAM,WAAW,wBAAyB,SAAQ,aAAa;IAC9D,UAAU,CAAC,EAAE,MAAM,GAAG,MAAM,GAAG,UAAU,GAAG;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,QAAQ,EAAE;YAAE,IAAI,EAAE,MAAM,CAAA;SAAE,CAAA;KAAE,CAAC;IAC7F,eAAe,CAAC,EAAE,SAAS,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,GAAG,OAAO,CAAC;CAClE;AAOD,KAAK,+BAA+B,GAAG,IAAI,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,oBAAoB,CAAC,GAAG;IACtG,kBAAkB,CAAC,EAAE,uBAAuB,CAAC,oBAAoB,CAAC,CAAC;CACnE,CAAC;AAsBF,eAAO,MAAM,uBAAuB,EAAE,cAAc,CAAC,oBAAoB,EAAE,wBAAwB,CA+SlG,CAAC;AAEF,eAAO,MAAM,6BAA6B,EAAE,cAAc,CAAC,oBAAoB,EAAE,mBAAmB,CAoBnG,CAAC;AA8RF,wBAAgB,eAAe,CAC9B,KAAK,EAAE,KAAK,CAAC,oBAAoB,CAAC,EAClC,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE,+BAA+B,GACrC,0BAA0B,EAAE,CA6O9B","sourcesContent":["import OpenAI from \"openai\";\nimport type {\n\tChatCompletionAssistantMessageParam,\n\tChatCompletionChunk,\n\tChatCompletionContentPart,\n\tChatCompletionContentPartImage,\n\tChatCompletionContentPartText,\n\tChatCompletionDeveloperMessageParam,\n\tChatCompletionMessageParam,\n\tChatCompletionSystemMessageParam,\n\tChatCompletionToolMessageParam,\n} from \"openai/resources/chat/completions.js\";\nimport { getEnvApiKey } from \"../env-api-keys.js\";\nimport { calculateCost, clampThinkingLevel } from \"../models.js\";\nimport type {\n\tAssistantMessage,\n\tCacheRetention,\n\tContext,\n\tImageContent,\n\tMessage,\n\tModel,\n\tOpenAICompletionsCompat,\n\tSimpleStreamOptions,\n\tStopReason,\n\tStreamFunction,\n\tStreamOptions,\n\tTextContent,\n\tThinkingContent,\n\tTool,\n\tToolCall,\n\tToolResultMessage,\n} from \"../types.js\";\nimport { AssistantMessageEventStream } from \"../utils/event-stream.js\";\nimport { headersToRecord } from \"../utils/headers.js\";\nimport { parseStreamingJson } from \"../utils/json-parse.js\";\nimport { sanitizeSurrogates } from \"../utils/sanitize-unicode.js\";\nimport { isCloudflareProvider, resolveCloudflareBaseUrl } from \"./cloudflare.js\";\nimport { buildCopilotDynamicHeaders, hasCopilotVisionInput } from \"./github-copilot-headers.js\";\nimport { buildBaseOptions } from \"./simple-options.js\";\nimport { transformMessages } from \"./transform-messages.js\";\n\n/**\n * Check if conversation messages contain tool calls or tool results.\n * This is needed because Anthropic (via proxy) requires the tools param\n * to be present when messages include tool_calls or tool role messages.\n */\nfunction hasToolHistory(messages: Message[]): boolean {\n\tfor (const msg of messages) {\n\t\tif (msg.role === \"toolResult\") {\n\t\t\treturn true;\n\t\t}\n\t\tif (msg.role === \"assistant\") {\n\t\t\tif (msg.content.some((block) => block.type === \"toolCall\")) {\n\t\t\t\treturn true;\n\t\t\t}\n\t\t}\n\t}\n\treturn false;\n}\n\nfunction isTextContentBlock(block: { type: string }): block is TextContent {\n\treturn block.type === \"text\";\n}\n\nfunction isThinkingContentBlock(block: { type: string }): block is ThinkingContent {\n\treturn block.type === \"thinking\";\n}\n\nfunction isToolCallBlock(block: { type: string }): block is ToolCall {\n\treturn block.type === \"toolCall\";\n}\n\nfunction isImageContentBlock(block: { type: string }): block is ImageContent {\n\treturn block.type === \"image\";\n}\n\nexport interface OpenAICompletionsOptions extends StreamOptions {\n\ttoolChoice?: \"auto\" | \"none\" | \"required\" | { type: \"function\"; function: { name: string } };\n\treasoningEffort?: \"minimal\" | \"low\" | \"medium\" | \"high\" | \"xhigh\";\n}\n\ninterface OpenAICompatCacheControl {\n\ttype: \"ephemeral\";\n\tttl?: string;\n}\n\ntype ResolvedOpenAICompletionsCompat = Omit<Required<OpenAICompletionsCompat>, \"cacheControlFormat\"> & {\n\tcacheControlFormat?: OpenAICompletionsCompat[\"cacheControlFormat\"];\n};\n\ntype ChatCompletionInstructionMessageParam = ChatCompletionDeveloperMessageParam | ChatCompletionSystemMessageParam;\n\ntype ChatCompletionTextPartWithCacheControl = ChatCompletionContentPartText & {\n\tcache_control?: OpenAICompatCacheControl;\n};\n\ntype ChatCompletionToolWithCacheControl = OpenAI.Chat.Completions.ChatCompletionTool & {\n\tcache_control?: OpenAICompatCacheControl;\n};\n\nfunction resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention {\n\tif (cacheRetention) {\n\t\treturn cacheRetention;\n\t}\n\tif (typeof process !== \"undefined\" && process.env.PI_CACHE_RETENTION === \"long\") {\n\t\treturn \"long\";\n\t}\n\treturn \"short\";\n}\n\nexport const streamOpenAICompletions: StreamFunction<\"openai-completions\", OpenAICompletionsOptions> = (\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\toptions?: OpenAICompletionsOptions,\n): AssistantMessageEventStream => {\n\tconst stream = new AssistantMessageEventStream();\n\n\t(async () => {\n\t\tconst output: AssistantMessage = {\n\t\t\trole: \"assistant\",\n\t\t\tcontent: [],\n\t\t\tapi: model.api,\n\t\t\tprovider: model.provider,\n\t\t\tmodel: model.id,\n\t\t\tusage: {\n\t\t\t\tinput: 0,\n\t\t\t\toutput: 0,\n\t\t\t\tcacheRead: 0,\n\t\t\t\tcacheWrite: 0,\n\t\t\t\ttotalTokens: 0,\n\t\t\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t\t\t},\n\t\t\tstopReason: \"stop\",\n\t\t\ttimestamp: Date.now(),\n\t\t};\n\n\t\ttry {\n\t\t\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider) || \"\";\n\t\t\tconst compat = getCompat(model);\n\t\t\tconst cacheRetention = resolveCacheRetention(options?.cacheRetention);\n\t\t\tconst cacheSessionId = cacheRetention === \"none\" ? undefined : options?.sessionId;\n\t\t\tconst client = createClient(model, context, apiKey, options?.headers, cacheSessionId, compat);\n\t\t\tlet params = buildParams(model, context, options, compat, cacheRetention);\n\t\t\tconst nextParams = await options?.onPayload?.(params, model);\n\t\t\tif (nextParams !== undefined) {\n\t\t\t\tparams = nextParams as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming;\n\t\t\t}\n\t\t\tconst requestOptions = {\n\t\t\t\t...(options?.signal ? { signal: options.signal } : {}),\n\t\t\t\t...(options?.timeoutMs !== undefined ? { timeout: options.timeoutMs } : {}),\n\t\t\t\t...(options?.maxRetries !== undefined ? { maxRetries: options.maxRetries } : {}),\n\t\t\t};\n\t\t\tconst { data: openaiStream, response } = await client.chat.completions\n\t\t\t\t.create(params, requestOptions)\n\t\t\t\t.withResponse();\n\t\t\tawait options?.onResponse?.({ status: response.status, headers: headersToRecord(response.headers) }, model);\n\t\t\tstream.push({ type: \"start\", partial: output });\n\n\t\t\tinterface StreamingToolCallBlock extends ToolCall {\n\t\t\t\tpartialArgs?: string;\n\t\t\t\tstreamIndex?: number;\n\t\t\t}\n\t\t\ttype StreamingBlock = TextContent | ThinkingContent | StreamingToolCallBlock;\n\t\t\ttype StreamingToolCallDelta = NonNullable<ChatCompletionChunk.Choice.Delta[\"tool_calls\"]>[number];\n\n\t\t\tlet textBlock: TextContent | null = null;\n\t\t\tlet thinkingBlock: ThinkingContent | null = null;\n\t\t\tconst toolCallBlocksByIndex = new Map<number, StreamingToolCallBlock>();\n\t\t\tconst toolCallBlocksById = new Map<string, StreamingToolCallBlock>();\n\t\t\tconst blocks = output.content as StreamingBlock[];\n\t\t\tconst getContentIndex = (block: StreamingBlock) => blocks.indexOf(block);\n\t\t\tconst finishBlock = (block: StreamingBlock) => {\n\t\t\t\tconst contentIndex = getContentIndex(block);\n\t\t\t\tif (contentIndex === -1) {\n\t\t\t\t\treturn;\n\t\t\t\t}\n\t\t\t\tif (block.type === \"text\") {\n\t\t\t\t\tstream.push({\n\t\t\t\t\t\ttype: \"text_end\",\n\t\t\t\t\t\tcontentIndex,\n\t\t\t\t\t\tcontent: block.text,\n\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t});\n\t\t\t\t} else if (block.type === \"thinking\") {\n\t\t\t\t\tstream.push({\n\t\t\t\t\t\ttype: \"thinking_end\",\n\t\t\t\t\t\tcontentIndex,\n\t\t\t\t\t\tcontent: block.thinking,\n\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t});\n\t\t\t\t} else if (block.type === \"toolCall\") {\n\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialArgs);\n\t\t\t\t\t// Finalize in-place and strip the scratch buffers so replay only\n\t\t\t\t\t// carries parsed arguments.\n\t\t\t\t\tdelete block.partialArgs;\n\t\t\t\t\tdelete block.streamIndex;\n\t\t\t\t\tstream.push({\n\t\t\t\t\t\ttype: \"toolcall_end\",\n\t\t\t\t\t\tcontentIndex,\n\t\t\t\t\t\ttoolCall: block,\n\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t};\n\t\t\tconst ensureTextBlock = () => {\n\t\t\t\tif (!textBlock) {\n\t\t\t\t\ttextBlock = { type: \"text\", text: \"\" };\n\t\t\t\t\tblocks.push(textBlock);\n\t\t\t\t\tstream.push({ type: \"text_start\", contentIndex: getContentIndex(textBlock), partial: output });\n\t\t\t\t}\n\t\t\t\treturn textBlock;\n\t\t\t};\n\t\t\tconst ensureThinkingBlock = (thinkingSignature: string) => {\n\t\t\t\tif (!thinkingBlock) {\n\t\t\t\t\tthinkingBlock = {\n\t\t\t\t\t\ttype: \"thinking\",\n\t\t\t\t\t\tthinking: \"\",\n\t\t\t\t\t\tthinkingSignature,\n\t\t\t\t\t};\n\t\t\t\t\tblocks.push(thinkingBlock);\n\t\t\t\t\tstream.push({ type: \"thinking_start\", contentIndex: getContentIndex(thinkingBlock), partial: output });\n\t\t\t\t}\n\t\t\t\treturn thinkingBlock;\n\t\t\t};\n\t\t\tconst ensureToolCallBlock = (toolCall: StreamingToolCallDelta) => {\n\t\t\t\tconst streamIndex = typeof toolCall.index === \"number\" ? toolCall.index : undefined;\n\t\t\t\tlet block = streamIndex !== undefined ? toolCallBlocksByIndex.get(streamIndex) : undefined;\n\t\t\t\tif (!block && toolCall.id) {\n\t\t\t\t\tblock = toolCallBlocksById.get(toolCall.id);\n\t\t\t\t}\n\t\t\t\tif (!block) {\n\t\t\t\t\tblock = {\n\t\t\t\t\t\ttype: \"toolCall\",\n\t\t\t\t\t\tid: toolCall.id || \"\",\n\t\t\t\t\t\tname: toolCall.function?.name || \"\",\n\t\t\t\t\t\targuments: {},\n\t\t\t\t\t\tpartialArgs: \"\",\n\t\t\t\t\t\tstreamIndex,\n\t\t\t\t\t};\n\t\t\t\t\tif (streamIndex !== undefined) {\n\t\t\t\t\t\ttoolCallBlocksByIndex.set(streamIndex, block);\n\t\t\t\t\t}\n\t\t\t\t\tif (toolCall.id) {\n\t\t\t\t\t\ttoolCallBlocksById.set(toolCall.id, block);\n\t\t\t\t\t}\n\t\t\t\t\tblocks.push(block);\n\t\t\t\t\tstream.push({\n\t\t\t\t\t\ttype: \"toolcall_start\",\n\t\t\t\t\t\tcontentIndex: getContentIndex(block),\n\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t});\n\t\t\t\t}\n\t\t\t\tif (streamIndex !== undefined && block.streamIndex === undefined) {\n\t\t\t\t\tblock.streamIndex = streamIndex;\n\t\t\t\t\ttoolCallBlocksByIndex.set(streamIndex, block);\n\t\t\t\t}\n\t\t\t\tif (toolCall.id) {\n\t\t\t\t\ttoolCallBlocksById.set(toolCall.id, block);\n\t\t\t\t}\n\t\t\t\treturn block;\n\t\t\t};\n\n\t\t\tfor await (const chunk of openaiStream) {\n\t\t\t\tif (!chunk || typeof chunk !== \"object\") continue;\n\n\t\t\t\t// OpenAI documents ChatCompletionChunk.id as the unique chat completion identifier,\n\t\t\t\t// and each chunk in a streamed completion carries the same id.\n\t\t\t\toutput.responseId ||= chunk.id;\n\t\t\t\tif (typeof chunk.model === \"string\" && chunk.model.length > 0 && chunk.model !== model.id) {\n\t\t\t\t\toutput.responseModel ||= chunk.model;\n\t\t\t\t}\n\t\t\t\tif (chunk.usage) {\n\t\t\t\t\toutput.usage = parseChunkUsage(chunk.usage, model);\n\t\t\t\t}\n\n\t\t\t\tconst choice = Array.isArray(chunk.choices) ? chunk.choices[0] : undefined;\n\t\t\t\tif (!choice) continue;\n\n\t\t\t\t// Fallback: some providers (e.g., Moonshot) return usage\n\t\t\t\t// in choice.usage instead of the standard chunk.usage\n\t\t\t\tif (!chunk.usage && (choice as any).usage) {\n\t\t\t\t\toutput.usage = parseChunkUsage((choice as any).usage, model);\n\t\t\t\t}\n\n\t\t\t\tif (choice.finish_reason) {\n\t\t\t\t\tconst finishReasonResult = mapStopReason(choice.finish_reason);\n\t\t\t\t\toutput.stopReason = finishReasonResult.stopReason;\n\t\t\t\t\tif (finishReasonResult.errorMessage) {\n\t\t\t\t\t\toutput.errorMessage = finishReasonResult.errorMessage;\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\tif (choice.delta) {\n\t\t\t\t\tif (\n\t\t\t\t\t\tchoice.delta.content !== null &&\n\t\t\t\t\t\tchoice.delta.content !== undefined &&\n\t\t\t\t\t\tchoice.delta.content.length > 0\n\t\t\t\t\t) {\n\t\t\t\t\t\tconst block = ensureTextBlock();\n\t\t\t\t\t\tblock.text += choice.delta.content;\n\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\ttype: \"text_delta\",\n\t\t\t\t\t\t\tcontentIndex: getContentIndex(block),\n\t\t\t\t\t\t\tdelta: choice.delta.content,\n\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t});\n\t\t\t\t\t}\n\n\t\t\t\t\t// Some endpoints return reasoning in reasoning_content (llama.cpp),\n\t\t\t\t\t// or reasoning (other openai compatible endpoints)\n\t\t\t\t\t// Use the first non-empty reasoning field to avoid duplication\n\t\t\t\t\t// (e.g., chutes.ai returns both reasoning_content and reasoning with same content)\n\t\t\t\t\tconst reasoningFields = [\"reasoning_content\", \"reasoning\", \"reasoning_text\"];\n\t\t\t\t\tconst deltaFields = choice.delta as Record<string, unknown>;\n\t\t\t\t\tlet foundReasoningField: string | null = null;\n\t\t\t\t\tfor (const field of reasoningFields) {\n\t\t\t\t\t\tconst value = deltaFields[field];\n\t\t\t\t\t\tif (typeof value === \"string\" && value.length > 0) {\n\t\t\t\t\t\t\tfoundReasoningField = field;\n\t\t\t\t\t\t\tbreak;\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tif (foundReasoningField) {\n\t\t\t\t\t\tconst delta = deltaFields[foundReasoningField];\n\t\t\t\t\t\tif (typeof delta === \"string\" && delta.length > 0) {\n\t\t\t\t\t\t\tconst block = ensureThinkingBlock(foundReasoningField);\n\t\t\t\t\t\t\tblock.thinking += delta;\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"thinking_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: getContentIndex(block),\n\t\t\t\t\t\t\t\tdelta,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tif (choice?.delta?.tool_calls) {\n\t\t\t\t\t\tfor (const toolCall of choice.delta.tool_calls) {\n\t\t\t\t\t\t\tconst block = ensureToolCallBlock(toolCall);\n\t\t\t\t\t\t\tif (!block.id && toolCall.id) {\n\t\t\t\t\t\t\t\tblock.id = toolCall.id;\n\t\t\t\t\t\t\t\ttoolCallBlocksById.set(toolCall.id, block);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tif (!block.name && toolCall.function?.name) {\n\t\t\t\t\t\t\t\tblock.name = toolCall.function.name;\n\t\t\t\t\t\t\t}\n\n\t\t\t\t\t\t\tlet delta = \"\";\n\t\t\t\t\t\t\tif (toolCall.function?.arguments) {\n\t\t\t\t\t\t\t\tdelta = toolCall.function.arguments;\n\t\t\t\t\t\t\t\tblock.partialArgs = (block.partialArgs ?? \"\") + toolCall.function.arguments;\n\t\t\t\t\t\t\t\tblock.arguments = parseStreamingJson(block.partialArgs);\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\tstream.push({\n\t\t\t\t\t\t\t\ttype: \"toolcall_delta\",\n\t\t\t\t\t\t\t\tcontentIndex: getContentIndex(block),\n\t\t\t\t\t\t\t\tdelta,\n\t\t\t\t\t\t\t\tpartial: output,\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\n\t\t\t\t\tconst reasoningDetails = (choice.delta as any).reasoning_details;\n\t\t\t\t\tif (reasoningDetails && Array.isArray(reasoningDetails)) {\n\t\t\t\t\t\tfor (const detail of reasoningDetails) {\n\t\t\t\t\t\t\tif (detail.type === \"reasoning.encrypted\" && detail.id && detail.data) {\n\t\t\t\t\t\t\t\tconst matchingToolCall = output.content.find(\n\t\t\t\t\t\t\t\t\t(b) => b.type === \"toolCall\" && b.id === detail.id,\n\t\t\t\t\t\t\t\t) as ToolCall | undefined;\n\t\t\t\t\t\t\t\tif (matchingToolCall) {\n\t\t\t\t\t\t\t\t\tmatchingToolCall.thoughtSignature = JSON.stringify(detail);\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tfor (const block of blocks) {\n\t\t\t\tfinishBlock(block);\n\t\t\t}\n\t\t\tif (options?.signal?.aborted) {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\n\t\t\tif (output.stopReason === \"aborted\") {\n\t\t\t\tthrow new Error(\"Request was aborted\");\n\t\t\t}\n\t\t\tif (output.stopReason === \"error\") {\n\t\t\t\tthrow new Error(output.errorMessage || \"Provider returned an error stop reason\");\n\t\t\t}\n\n\t\t\tstream.push({ type: \"done\", reason: output.stopReason, message: output });\n\t\t\tstream.end();\n\t\t} catch (error) {\n\t\t\tfor (const block of output.content) {\n\t\t\t\tdelete (block as { index?: number }).index;\n\t\t\t\t// Streaming scratch buffers are only used during parsing; never persist them.\n\t\t\t\tdelete (block as { partialArgs?: string }).partialArgs;\n\t\t\t\tdelete (block as { streamIndex?: number }).streamIndex;\n\t\t\t}\n\t\t\toutput.stopReason = options?.signal?.aborted ? \"aborted\" : \"error\";\n\t\t\toutput.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);\n\t\t\t// Some providers via OpenRouter give additional information in this field.\n\t\t\tconst rawMetadata = (error as any)?.error?.metadata?.raw;\n\t\t\tif (rawMetadata) output.errorMessage += `\\n${rawMetadata}`;\n\t\t\tstream.push({ type: \"error\", reason: output.stopReason, error: output });\n\t\t\tstream.end();\n\t\t}\n\t})();\n\n\treturn stream;\n};\n\nexport const streamSimpleOpenAICompletions: StreamFunction<\"openai-completions\", SimpleStreamOptions> = (\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\toptions?: SimpleStreamOptions,\n): AssistantMessageEventStream => {\n\tconst apiKey = options?.apiKey || getEnvApiKey(model.provider);\n\tif (!apiKey) {\n\t\tthrow new Error(`No API key for provider: ${model.provider}`);\n\t}\n\n\tconst base = buildBaseOptions(model, options, apiKey);\n\tconst clampedReasoning = options?.reasoning ? clampThinkingLevel(model, options.reasoning) : undefined;\n\tconst reasoningEffort = clampedReasoning === \"off\" ? undefined : clampedReasoning;\n\tconst toolChoice = (options as OpenAICompletionsOptions | undefined)?.toolChoice;\n\n\treturn streamOpenAICompletions(model, context, {\n\t\t...base,\n\t\treasoningEffort,\n\t\ttoolChoice,\n\t} satisfies OpenAICompletionsOptions);\n};\n\nfunction createClient(\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\tapiKey?: string,\n\toptionsHeaders?: Record<string, string>,\n\tsessionId?: string,\n\tcompat: ResolvedOpenAICompletionsCompat = getCompat(model),\n) {\n\tif (!apiKey) {\n\t\tif (!process.env.OPENAI_API_KEY) {\n\t\t\tthrow new Error(\n\t\t\t\t\"OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.\",\n\t\t\t);\n\t\t}\n\t\tapiKey = process.env.OPENAI_API_KEY;\n\t}\n\n\tconst headers = { ...model.headers };\n\tif (model.provider === \"github-copilot\") {\n\t\tconst hasImages = hasCopilotVisionInput(context.messages);\n\t\tconst copilotHeaders = buildCopilotDynamicHeaders({\n\t\t\tmessages: context.messages,\n\t\t\thasImages,\n\t\t});\n\t\tObject.assign(headers, copilotHeaders);\n\t}\n\n\tif (sessionId && compat.sendSessionAffinityHeaders) {\n\t\theaders.session_id = sessionId;\n\t\theaders[\"x-client-request-id\"] = sessionId;\n\t\theaders[\"x-session-affinity\"] = sessionId;\n\t}\n\n\t// Merge options headers last so they can override defaults\n\tif (optionsHeaders) {\n\t\tObject.assign(headers, optionsHeaders);\n\t}\n\n\tconst defaultHeaders =\n\t\tmodel.provider === \"cloudflare-ai-gateway\"\n\t\t\t? {\n\t\t\t\t\t...headers,\n\t\t\t\t\tAuthorization: headers.Authorization ?? null,\n\t\t\t\t\t\"cf-aig-authorization\": `Bearer ${apiKey}`,\n\t\t\t\t}\n\t\t\t: headers;\n\n\treturn new OpenAI({\n\t\tapiKey,\n\t\tbaseURL: isCloudflareProvider(model.provider) ? resolveCloudflareBaseUrl(model) : model.baseUrl,\n\t\tdangerouslyAllowBrowser: true,\n\t\tdefaultHeaders,\n\t});\n}\n\nfunction buildParams(\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\toptions?: OpenAICompletionsOptions,\n\tcompat: ResolvedOpenAICompletionsCompat = getCompat(model),\n\tcacheRetention: CacheRetention = resolveCacheRetention(options?.cacheRetention),\n) {\n\tconst messages = convertMessages(model, context, compat);\n\tconst cacheControl = getCompatCacheControl(compat, cacheRetention);\n\n\tconst params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {\n\t\tmodel: model.id,\n\t\tmessages,\n\t\tstream: true,\n\t\tprompt_cache_key:\n\t\t\t(model.baseUrl.includes(\"api.openai.com\") && cacheRetention !== \"none\") ||\n\t\t\t(cacheRetention === \"long\" && compat.supportsLongCacheRetention)\n\t\t\t\t? options?.sessionId\n\t\t\t\t: undefined,\n\t\tprompt_cache_retention: cacheRetention === \"long\" && compat.supportsLongCacheRetention ? \"24h\" : undefined,\n\t};\n\n\tif (compat.supportsUsageInStreaming !== false) {\n\t\t(params as any).stream_options = { include_usage: true };\n\t}\n\n\tif (compat.supportsStore) {\n\t\tparams.store = false;\n\t}\n\n\tif (options?.maxTokens) {\n\t\tif (compat.maxTokensField === \"max_tokens\") {\n\t\t\t(params as any).max_tokens = options.maxTokens;\n\t\t} else {\n\t\t\tparams.max_completion_tokens = options.maxTokens;\n\t\t}\n\t}\n\n\tif (options?.temperature !== undefined) {\n\t\tparams.temperature = options.temperature;\n\t}\n\n\tif (context.tools && context.tools.length > 0) {\n\t\tparams.tools = convertTools(context.tools, compat);\n\t\tif (compat.zaiToolStream) {\n\t\t\t(params as any).tool_stream = true;\n\t\t}\n\t} else if (hasToolHistory(context.messages)) {\n\t\t// Anthropic (via LiteLLM/proxy) requires tools param when conversation has tool_calls/tool_results\n\t\tparams.tools = [];\n\t}\n\n\tif (cacheControl) {\n\t\tapplyAnthropicCacheControl(messages, params.tools, cacheControl);\n\t}\n\n\tif (options?.toolChoice) {\n\t\tparams.tool_choice = options.toolChoice;\n\t}\n\n\tif (compat.thinkingFormat === \"zai\" && model.reasoning) {\n\t\t(params as any).enable_thinking = !!options?.reasoningEffort;\n\t} else if (compat.thinkingFormat === \"qwen\" && model.reasoning) {\n\t\t(params as any).enable_thinking = !!options?.reasoningEffort;\n\t} else if (compat.thinkingFormat === \"qwen-chat-template\" && model.reasoning) {\n\t\t(params as any).chat_template_kwargs = {\n\t\t\tenable_thinking: !!options?.reasoningEffort,\n\t\t\tpreserve_thinking: true,\n\t\t};\n\t} else if (compat.thinkingFormat === \"deepseek\" && model.reasoning) {\n\t\t(params as any).thinking = { type: options?.reasoningEffort ? \"enabled\" : \"disabled\" };\n\t\tif (options?.reasoningEffort) {\n\t\t\t(params as any).reasoning_effort =\n\t\t\t\tmodel.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;\n\t\t}\n\t} else if (compat.thinkingFormat === \"openrouter\" && model.reasoning) {\n\t\t// OpenRouter normalizes reasoning across providers via a nested reasoning object.\n\t\tconst openRouterParams = params as typeof params & { reasoning?: { effort?: string } };\n\t\tif (options?.reasoningEffort) {\n\t\t\topenRouterParams.reasoning = {\n\t\t\t\teffort: model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort,\n\t\t\t};\n\t\t} else if (model.thinkingLevelMap?.off !== null) {\n\t\t\topenRouterParams.reasoning = { effort: model.thinkingLevelMap?.off ?? \"none\" };\n\t\t}\n\t} else if (options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {\n\t\t// OpenAI-style reasoning_effort\n\t\t(params as any).reasoning_effort = model.thinkingLevelMap?.[options.reasoningEffort] ?? options.reasoningEffort;\n\t} else if (!options?.reasoningEffort && model.reasoning && compat.supportsReasoningEffort) {\n\t\tconst offValue = model.thinkingLevelMap?.off;\n\t\tif (typeof offValue === \"string\") {\n\t\t\t(params as any).reasoning_effort = offValue;\n\t\t}\n\t}\n\n\t// OpenRouter provider routing preferences\n\tif (model.baseUrl.includes(\"openrouter.ai\") && model.compat?.openRouterRouting) {\n\t\t(params as any).provider = model.compat.openRouterRouting;\n\t}\n\n\t// Vercel AI Gateway provider routing preferences\n\tif (model.baseUrl.includes(\"ai-gateway.vercel.sh\") && model.compat?.vercelGatewayRouting) {\n\t\tconst routing = model.compat.vercelGatewayRouting;\n\t\tif (routing.only || routing.order) {\n\t\t\tconst gatewayOptions: Record<string, string[]> = {};\n\t\t\tif (routing.only) gatewayOptions.only = routing.only;\n\t\t\tif (routing.order) gatewayOptions.order = routing.order;\n\t\t\t(params as any).providerOptions = { gateway: gatewayOptions };\n\t\t}\n\t}\n\n\treturn params;\n}\n\nfunction getCompatCacheControl(\n\tcompat: ResolvedOpenAICompletionsCompat,\n\tcacheRetention: CacheRetention,\n): OpenAICompatCacheControl | undefined {\n\tif (compat.cacheControlFormat !== \"anthropic\" || cacheRetention === \"none\") {\n\t\treturn undefined;\n\t}\n\n\tconst ttl = cacheRetention === \"long\" && compat.supportsLongCacheRetention ? \"1h\" : undefined;\n\treturn { type: \"ephemeral\", ...(ttl ? { ttl } : {}) };\n}\n\nfunction applyAnthropicCacheControl(\n\tmessages: ChatCompletionMessageParam[],\n\ttools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,\n\tcacheControl: OpenAICompatCacheControl,\n): void {\n\taddCacheControlToSystemPrompt(messages, cacheControl);\n\taddCacheControlToLastTool(tools, cacheControl);\n\taddCacheControlToLastConversationMessage(messages, cacheControl);\n}\n\nfunction addCacheControlToSystemPrompt(\n\tmessages: ChatCompletionMessageParam[],\n\tcacheControl: OpenAICompatCacheControl,\n): void {\n\tfor (const message of messages) {\n\t\tif (message.role === \"system\" || message.role === \"developer\") {\n\t\t\taddCacheControlToInstructionMessage(message, cacheControl);\n\t\t\treturn;\n\t\t}\n\t}\n}\n\nfunction addCacheControlToLastConversationMessage(\n\tmessages: ChatCompletionMessageParam[],\n\tcacheControl: OpenAICompatCacheControl,\n): void {\n\tfor (let i = messages.length - 1; i >= 0; i--) {\n\t\tconst message = messages[i];\n\t\tif (message.role === \"user\" || message.role === \"assistant\") {\n\t\t\tif (addCacheControlToMessage(message, cacheControl)) {\n\t\t\t\treturn;\n\t\t\t}\n\t\t}\n\t}\n}\n\nfunction addCacheControlToLastTool(\n\ttools: OpenAI.Chat.Completions.ChatCompletionTool[] | undefined,\n\tcacheControl: OpenAICompatCacheControl,\n): void {\n\tif (!tools || tools.length === 0) {\n\t\treturn;\n\t}\n\n\tconst lastTool = tools[tools.length - 1] as ChatCompletionToolWithCacheControl;\n\tlastTool.cache_control = cacheControl;\n}\n\nfunction addCacheControlToInstructionMessage(\n\tmessage: ChatCompletionInstructionMessageParam,\n\tcacheControl: OpenAICompatCacheControl,\n): boolean {\n\treturn addCacheControlToTextContent(message, cacheControl);\n}\n\nfunction addCacheControlToMessage(\n\tmessage: ChatCompletionMessageParam,\n\tcacheControl: OpenAICompatCacheControl,\n): boolean {\n\tif (message.role === \"user\" || message.role === \"assistant\") {\n\t\treturn addCacheControlToTextContent(message, cacheControl);\n\t}\n\treturn false;\n}\n\nfunction addCacheControlToTextContent(\n\tmessage:\n\t\t| ChatCompletionInstructionMessageParam\n\t\t| ChatCompletionAssistantMessageParam\n\t\t| Extract<ChatCompletionMessageParam, { role: \"user\" }>,\n\tcacheControl: OpenAICompatCacheControl,\n): boolean {\n\tconst content = message.content;\n\tif (typeof content === \"string\") {\n\t\tif (content.length === 0) {\n\t\t\treturn false;\n\t\t}\n\t\tmessage.content = [\n\t\t\t{\n\t\t\t\ttype: \"text\",\n\t\t\t\ttext: content,\n\t\t\t\tcache_control: cacheControl,\n\t\t\t},\n\t\t] as ChatCompletionTextPartWithCacheControl[];\n\t\treturn true;\n\t}\n\n\tif (!Array.isArray(content)) {\n\t\treturn false;\n\t}\n\n\tfor (let i = content.length - 1; i >= 0; i--) {\n\t\tconst part = content[i];\n\t\tif (part?.type === \"text\") {\n\t\t\tconst textPart = part as ChatCompletionTextPartWithCacheControl;\n\t\t\ttextPart.cache_control = cacheControl;\n\t\t\treturn true;\n\t\t}\n\t}\n\n\treturn false;\n}\n\nexport function convertMessages(\n\tmodel: Model<\"openai-completions\">,\n\tcontext: Context,\n\tcompat: ResolvedOpenAICompletionsCompat,\n): ChatCompletionMessageParam[] {\n\tconst params: ChatCompletionMessageParam[] = [];\n\n\tconst normalizeToolCallId = (id: string): string => {\n\t\t// Handle pipe-separated IDs from OpenAI Responses API\n\t\t// Format: {call_id}|{id} where {id} can be 400+ chars with special chars (+, /, =)\n\t\t// These come from providers like github-copilot, openai-codex, opencode\n\t\t// Extract just the call_id part and normalize it\n\t\tif (id.includes(\"|\")) {\n\t\t\tconst [callId] = id.split(\"|\");\n\t\t\t// Sanitize to allowed chars and truncate to 40 chars (OpenAI limit)\n\t\t\treturn callId.replace(/[^a-zA-Z0-9_-]/g, \"_\").slice(0, 40);\n\t\t}\n\n\t\tif (model.provider === \"openai\") return id.length > 40 ? id.slice(0, 40) : id;\n\t\treturn id;\n\t};\n\n\tconst transformedMessages = transformMessages(context.messages, model, (id) => normalizeToolCallId(id));\n\n\tif (context.systemPrompt) {\n\t\tconst useDeveloperRole = model.reasoning && compat.supportsDeveloperRole;\n\t\tconst role = useDeveloperRole ? \"developer\" : \"system\";\n\t\tparams.push({ role: role, content: sanitizeSurrogates(context.systemPrompt) });\n\t}\n\n\tlet lastRole: string | null = null;\n\n\tfor (let i = 0; i < transformedMessages.length; i++) {\n\t\tconst msg = transformedMessages[i];\n\t\t// Some providers don't allow user messages directly after tool results\n\t\t// Insert a synthetic assistant message to bridge the gap\n\t\tif (compat.requiresAssistantAfterToolResult && lastRole === \"toolResult\" && msg.role === \"user\") {\n\t\t\tparams.push({\n\t\t\t\trole: \"assistant\",\n\t\t\t\tcontent: \"I have processed the tool results.\",\n\t\t\t});\n\t\t}\n\n\t\tif (msg.role === \"user\") {\n\t\t\tif (typeof msg.content === \"string\") {\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: sanitizeSurrogates(msg.content),\n\t\t\t\t});\n\t\t\t} else {\n\t\t\t\tconst content: ChatCompletionContentPart[] = msg.content.map((item): ChatCompletionContentPart => {\n\t\t\t\t\tif (item.type === \"text\") {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(item.text),\n\t\t\t\t\t\t} satisfies ChatCompletionContentPartText;\n\t\t\t\t\t} else {\n\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\ttype: \"image_url\",\n\t\t\t\t\t\t\timage_url: {\n\t\t\t\t\t\t\t\turl: `data:${item.mimeType};base64,${item.data}`,\n\t\t\t\t\t\t\t},\n\t\t\t\t\t\t} satisfies ChatCompletionContentPartImage;\n\t\t\t\t\t}\n\t\t\t\t});\n\t\t\t\tif (content.length === 0) continue;\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent,\n\t\t\t\t});\n\t\t\t}\n\t\t} else if (msg.role === \"assistant\") {\n\t\t\t// Some providers don't accept null content, use empty string instead\n\t\t\tconst assistantMsg: ChatCompletionAssistantMessageParam = {\n\t\t\t\trole: \"assistant\",\n\t\t\t\tcontent: compat.requiresAssistantAfterToolResult ? \"\" : null,\n\t\t\t};\n\n\t\t\tconst assistantTextParts = msg.content\n\t\t\t\t.filter(isTextContentBlock)\n\t\t\t\t.filter((block) => block.text.trim().length > 0)\n\t\t\t\t.map(\n\t\t\t\t\t(block) =>\n\t\t\t\t\t\t({\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: sanitizeSurrogates(block.text),\n\t\t\t\t\t\t}) satisfies ChatCompletionContentPartText,\n\t\t\t\t);\n\t\t\tconst assistantText = assistantTextParts.map((part) => part.text).join(\"\");\n\n\t\t\tconst nonEmptyThinkingBlocks = msg.content\n\t\t\t\t.filter(isThinkingContentBlock)\n\t\t\t\t.filter((block) => block.thinking.trim().length > 0);\n\t\t\tif (nonEmptyThinkingBlocks.length > 0) {\n\t\t\t\tif (compat.requiresThinkingAsText) {\n\t\t\t\t\t// Convert thinking blocks to plain text (no tags to avoid model mimicking them)\n\t\t\t\t\tconst thinkingText = nonEmptyThinkingBlocks\n\t\t\t\t\t\t.map((block) => sanitizeSurrogates(block.thinking))\n\t\t\t\t\t\t.join(\"\\n\\n\");\n\t\t\t\t\tassistantMsg.content = [{ type: \"text\", text: thinkingText }, ...assistantTextParts];\n\t\t\t\t} else {\n\t\t\t\t\t// Always send assistant content as a plain string (OpenAI Chat Completions\n\t\t\t\t\t// API standard format). Sending as an array of {type:\"text\", text:\"...\"}\n\t\t\t\t\t// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via\n\t\t\t\t\t// NVIDIA NIM) to mirror the content-block structure literally in their\n\t\t\t\t\t// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].\n\t\t\t\t\tif (assistantText.length > 0) {\n\t\t\t\t\t\tassistantMsg.content = assistantText;\n\t\t\t\t\t}\n\n\t\t\t\t\t// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)\n\t\t\t\t\tconst signature = nonEmptyThinkingBlocks[0].thinkingSignature;\n\t\t\t\t\tif (signature && signature.length > 0) {\n\t\t\t\t\t\t(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join(\"\\n\");\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t} else if (assistantText.length > 0) {\n\t\t\t\t// Always send assistant content as a plain string (OpenAI Chat Completions\n\t\t\t\t// API standard format). Sending as an array of {type:\"text\", text:\"...\"}\n\t\t\t\t// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via\n\t\t\t\t// NVIDIA NIM) to mirror the content-block structure literally in their\n\t\t\t\t// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].\n\t\t\t\tassistantMsg.content = assistantText;\n\t\t\t}\n\n\t\t\tconst toolCalls = msg.content.filter(isToolCallBlock);\n\t\t\tif (toolCalls.length > 0) {\n\t\t\t\tassistantMsg.tool_calls = toolCalls.map((tc) => ({\n\t\t\t\t\tid: tc.id,\n\t\t\t\t\ttype: \"function\" as const,\n\t\t\t\t\tfunction: {\n\t\t\t\t\t\tname: tc.name,\n\t\t\t\t\t\targuments: JSON.stringify(tc.arguments),\n\t\t\t\t\t},\n\t\t\t\t}));\n\t\t\t\tconst reasoningDetails = toolCalls\n\t\t\t\t\t.filter((tc) => tc.thoughtSignature)\n\t\t\t\t\t.map((tc) => {\n\t\t\t\t\t\ttry {\n\t\t\t\t\t\t\treturn JSON.parse(tc.thoughtSignature!);\n\t\t\t\t\t\t} catch {\n\t\t\t\t\t\t\treturn null;\n\t\t\t\t\t\t}\n\t\t\t\t\t})\n\t\t\t\t\t.filter(Boolean);\n\t\t\t\tif (reasoningDetails.length > 0) {\n\t\t\t\t\t(assistantMsg as any).reasoning_details = reasoningDetails;\n\t\t\t\t}\n\t\t\t}\n\t\t\tif (\n\t\t\t\tcompat.requiresReasoningContentOnAssistantMessages &&\n\t\t\t\tmodel.reasoning &&\n\t\t\t\t(assistantMsg as { reasoning_content?: string }).reasoning_content === undefined\n\t\t\t) {\n\t\t\t\t(assistantMsg as { reasoning_content?: string }).reasoning_content = \"\";\n\t\t\t}\n\t\t\t// Skip assistant messages that have no content and no tool calls.\n\t\t\t// Some providers require \"either content or tool_calls, but not none\".\n\t\t\t// Other providers also don't accept empty assistant messages.\n\t\t\t// This handles aborted assistant responses that got no content.\n\t\t\tconst content = assistantMsg.content;\n\t\t\tconst hasContent =\n\t\t\t\tcontent !== null &&\n\t\t\t\tcontent !== undefined &&\n\t\t\t\t(typeof content === \"string\" ? content.length > 0 : content.length > 0);\n\t\t\tif (!hasContent && !assistantMsg.tool_calls) {\n\t\t\t\tcontinue;\n\t\t\t}\n\t\t\tparams.push(assistantMsg);\n\t\t} else if (msg.role === \"toolResult\") {\n\t\t\tconst imageBlocks: Array<{ type: \"image_url\"; image_url: { url: string } }> = [];\n\t\t\tlet j = i;\n\n\t\t\tfor (; j < transformedMessages.length && transformedMessages[j].role === \"toolResult\"; j++) {\n\t\t\t\tconst toolMsg = transformedMessages[j] as ToolResultMessage;\n\n\t\t\t\t// Extract text and image content\n\t\t\t\tconst textResult = toolMsg.content\n\t\t\t\t\t.filter(isTextContentBlock)\n\t\t\t\t\t.map((block) => block.text)\n\t\t\t\t\t.join(\"\\n\");\n\t\t\t\tconst hasImages = toolMsg.content.some((c) => c.type === \"image\");\n\n\t\t\t\t// Always send tool result with text (or placeholder if only images)\n\t\t\t\tconst hasText = textResult.length > 0;\n\t\t\t\t// Some providers require the 'name' field in tool results\n\t\t\t\tconst toolResultMsg: ChatCompletionToolMessageParam = {\n\t\t\t\t\trole: \"tool\",\n\t\t\t\t\tcontent: sanitizeSurrogates(hasText ? textResult : \"(see attached image)\"),\n\t\t\t\t\ttool_call_id: toolMsg.toolCallId,\n\t\t\t\t};\n\t\t\t\tif (compat.requiresToolResultName && toolMsg.toolName) {\n\t\t\t\t\t(toolResultMsg as any).name = toolMsg.toolName;\n\t\t\t\t}\n\t\t\t\tparams.push(toolResultMsg);\n\n\t\t\t\tif (hasImages && model.input.includes(\"image\")) {\n\t\t\t\t\tfor (const block of toolMsg.content) {\n\t\t\t\t\t\tif (isImageContentBlock(block)) {\n\t\t\t\t\t\t\timageBlocks.push({\n\t\t\t\t\t\t\t\ttype: \"image_url\",\n\t\t\t\t\t\t\t\timage_url: {\n\t\t\t\t\t\t\t\t\turl: `data:${block.mimeType};base64,${block.data}`,\n\t\t\t\t\t\t\t\t},\n\t\t\t\t\t\t\t});\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\n\t\t\ti = j - 1;\n\n\t\t\tif (imageBlocks.length > 0) {\n\t\t\t\tif (compat.requiresAssistantAfterToolResult) {\n\t\t\t\t\tparams.push({\n\t\t\t\t\t\trole: \"assistant\",\n\t\t\t\t\t\tcontent: \"I have processed the tool results.\",\n\t\t\t\t\t});\n\t\t\t\t}\n\n\t\t\t\tparams.push({\n\t\t\t\t\trole: \"user\",\n\t\t\t\t\tcontent: [\n\t\t\t\t\t\t{\n\t\t\t\t\t\t\ttype: \"text\",\n\t\t\t\t\t\t\ttext: \"Attached image(s) from tool result:\",\n\t\t\t\t\t\t},\n\t\t\t\t\t\t...imageBlocks,\n\t\t\t\t\t],\n\t\t\t\t});\n\t\t\t\tlastRole = \"user\";\n\t\t\t} else {\n\t\t\t\tlastRole = \"toolResult\";\n\t\t\t}\n\t\t\tcontinue;\n\t\t}\n\n\t\tlastRole = msg.role;\n\t}\n\n\treturn params;\n}\n\nfunction convertTools(\n\ttools: Tool[],\n\tcompat: ResolvedOpenAICompletionsCompat,\n): OpenAI.Chat.Completions.ChatCompletionTool[] {\n\treturn tools.map((tool) => ({\n\t\ttype: \"function\",\n\t\tfunction: {\n\t\t\tname: tool.name,\n\t\t\tdescription: tool.description,\n\t\t\tparameters: tool.parameters as any, // TypeBox already generates JSON Schema\n\t\t\t// Only include strict if provider supports it. Some reject unknown fields.\n\t\t\t...(compat.supportsStrictMode !== false && { strict: false }),\n\t\t},\n\t}));\n}\n\nfunction parseChunkUsage(\n\trawUsage: {\n\t\tprompt_tokens?: number;\n\t\tcompletion_tokens?: number;\n\t\tprompt_cache_hit_tokens?: number;\n\t\tprompt_tokens_details?: { cached_tokens?: number; cache_write_tokens?: number };\n\t},\n\tmodel: Model<\"openai-completions\">,\n): AssistantMessage[\"usage\"] {\n\tconst promptTokens = rawUsage.prompt_tokens || 0;\n\tconst reportedCachedTokens = rawUsage.prompt_tokens_details?.cached_tokens ?? rawUsage.prompt_cache_hit_tokens ?? 0;\n\tconst cacheWriteTokens = rawUsage.prompt_tokens_details?.cache_write_tokens || 0;\n\n\t// Normalize to pi-ai semantics:\n\t// - cacheRead: hits from cache created by previous requests only\n\t// - cacheWrite: tokens written to cache in this request\n\t// Some OpenAI-compatible providers (observed on OpenRouter) report cached_tokens\n\t// as (previous hits + current writes). In that case, remove cacheWrite from cacheRead.\n\tconst cacheReadTokens =\n\t\tcacheWriteTokens > 0 ? Math.max(0, reportedCachedTokens - cacheWriteTokens) : reportedCachedTokens;\n\n\tconst input = Math.max(0, promptTokens - cacheReadTokens - cacheWriteTokens);\n\t// OpenAI completion_tokens already includes reasoning_tokens.\n\tconst outputTokens = rawUsage.completion_tokens || 0;\n\tconst usage: AssistantMessage[\"usage\"] = {\n\t\tinput,\n\t\toutput: outputTokens,\n\t\tcacheRead: cacheReadTokens,\n\t\tcacheWrite: cacheWriteTokens,\n\t\ttotalTokens: input + outputTokens + cacheReadTokens + cacheWriteTokens,\n\t\tcost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },\n\t};\n\tcalculateCost(model, usage);\n\treturn usage;\n}\n\nfunction mapStopReason(reason: ChatCompletionChunk.Choice[\"finish_reason\"] | string): {\n\tstopReason: StopReason;\n\terrorMessage?: string;\n} {\n\tif (reason === null) return { stopReason: \"stop\" };\n\tswitch (reason) {\n\t\tcase \"stop\":\n\t\tcase \"end\":\n\t\t\treturn { stopReason: \"stop\" };\n\t\tcase \"length\":\n\t\t\treturn { stopReason: \"length\" };\n\t\tcase \"function_call\":\n\t\tcase \"tool_calls\":\n\t\t\treturn { stopReason: \"toolUse\" };\n\t\tcase \"content_filter\":\n\t\t\treturn { stopReason: \"error\", errorMessage: \"Provider finish_reason: content_filter\" };\n\t\tcase \"network_error\":\n\t\t\treturn { stopReason: \"error\", errorMessage: \"Provider finish_reason: network_error\" };\n\t\tdefault:\n\t\t\treturn {\n\t\t\t\tstopReason: \"error\",\n\t\t\t\terrorMessage: `Provider finish_reason: ${reason}`,\n\t\t\t};\n\t}\n}\n\n/**\n * Detect compatibility settings from provider and baseUrl for known providers.\n * Provider takes precedence over URL-based detection since it's explicitly configured.\n * Returns a fully resolved OpenAICompletionsCompat object with all fields set.\n */\nfunction detectCompat(model: Model<\"openai-completions\">): ResolvedOpenAICompletionsCompat {\n\tconst provider = model.provider;\n\tconst baseUrl = model.baseUrl;\n\n\tconst isZai = provider === \"zai\" || baseUrl.includes(\"api.z.ai\");\n\tconst isMoonshot = provider === \"moonshotai\" || provider === \"moonshotai-cn\" || baseUrl.includes(\"api.moonshot.\");\n\tconst isCloudflareWorkersAI = provider === \"cloudflare-workers-ai\" || baseUrl.includes(\"api.cloudflare.com\");\n\tconst isCloudflareAiGateway = provider === \"cloudflare-ai-gateway\" || baseUrl.includes(\"gateway.ai.cloudflare.com\");\n\n\tconst isNonStandard =\n\t\tprovider === \"cerebras\" ||\n\t\tbaseUrl.includes(\"cerebras.ai\") ||\n\t\tprovider === \"xai\" ||\n\t\tbaseUrl.includes(\"api.x.ai\") ||\n\t\tbaseUrl.includes(\"chutes.ai\") ||\n\t\tbaseUrl.includes(\"deepseek.com\") ||\n\t\tisZai ||\n\t\tisMoonshot ||\n\t\tprovider === \"opencode\" ||\n\t\tbaseUrl.includes(\"opencode.ai\") ||\n\t\tisCloudflareWorkersAI ||\n\t\tisCloudflareAiGateway;\n\n\tconst useMaxTokens = baseUrl.includes(\"chutes.ai\") || isMoonshot || isCloudflareAiGateway;\n\n\tconst isGrok = provider === \"xai\" || baseUrl.includes(\"api.x.ai\");\n\tconst isDeepSeek = provider === \"deepseek\" || baseUrl.includes(\"deepseek.com\");\n\tconst cacheControlFormat = provider === \"openrouter\" && model.id.startsWith(\"anthropic/\") ? \"anthropic\" : undefined;\n\n\treturn {\n\t\tsupportsStore: !isNonStandard,\n\t\tsupportsDeveloperRole: !isNonStandard,\n\t\tsupportsReasoningEffort: !isGrok && !isZai && !isMoonshot && !isCloudflareAiGateway,\n\t\tsupportsUsageInStreaming: true,\n\t\tmaxTokensField: useMaxTokens ? \"max_tokens\" : \"max_completion_tokens\",\n\t\trequiresToolResultName: false,\n\t\trequiresAssistantAfterToolResult: false,\n\t\trequiresThinkingAsText: false,\n\t\trequiresReasoningContentOnAssistantMessages: isDeepSeek,\n\t\tthinkingFormat: isDeepSeek\n\t\t\t? \"deepseek\"\n\t\t\t: isZai\n\t\t\t\t? \"zai\"\n\t\t\t\t: provider === \"openrouter\" || baseUrl.includes(\"openrouter.ai\")\n\t\t\t\t\t? \"openrouter\"\n\t\t\t\t\t: \"openai\",\n\t\topenRouterRouting: {},\n\t\tvercelGatewayRouting: {},\n\t\tzaiToolStream: false,\n\t\tsupportsStrictMode: !isMoonshot && !isCloudflareAiGateway,\n\t\tcacheControlFormat,\n\t\tsendSessionAffinityHeaders: false,\n\t\tsupportsLongCacheRetention: !(isCloudflareWorkersAI || isCloudflareAiGateway),\n\t};\n}\n\n/**\n * Get resolved compatibility settings for a model.\n * Uses explicit model.compat if provided, otherwise auto-detects from provider/URL.\n */\nfunction getCompat(model: Model<\"openai-completions\">): ResolvedOpenAICompletionsCompat {\n\tconst detected = detectCompat(model);\n\tif (!model.compat) return detected;\n\n\treturn {\n\t\tsupportsStore: model.compat.supportsStore ?? detected.supportsStore,\n\t\tsupportsDeveloperRole: model.compat.supportsDeveloperRole ?? detected.supportsDeveloperRole,\n\t\tsupportsReasoningEffort: model.compat.supportsReasoningEffort ?? detected.supportsReasoningEffort,\n\t\tsupportsUsageInStreaming: model.compat.supportsUsageInStreaming ?? detected.supportsUsageInStreaming,\n\t\tmaxTokensField: model.compat.maxTokensField ?? detected.maxTokensField,\n\t\trequiresToolResultName: model.compat.requiresToolResultName ?? detected.requiresToolResultName,\n\t\trequiresAssistantAfterToolResult:\n\t\t\tmodel.compat.requiresAssistantAfterToolResult ?? detected.requiresAssistantAfterToolResult,\n\t\trequiresThinkingAsText: model.compat.requiresThinkingAsText ?? detected.requiresThinkingAsText,\n\t\trequiresReasoningContentOnAssistantMessages:\n\t\t\tmodel.compat.requiresReasoningContentOnAssistantMessages ??\n\t\t\tdetected.requiresReasoningContentOnAssistantMessages,\n\t\tthinkingFormat: model.compat.thinkingFormat ?? detected.thinkingFormat,\n\t\topenRouterRouting: model.compat.openRouterRouting ?? {},\n\t\tvercelGatewayRouting: model.compat.vercelGatewayRouting ?? detected.vercelGatewayRouting,\n\t\tzaiToolStream: model.compat.zaiToolStream ?? detected.zaiToolStream,\n\t\tsupportsStrictMode: model.compat.supportsStrictMode ?? detected.supportsStrictMode,\n\t\tcacheControlFormat: model.compat.cacheControlFormat ?? detected.cacheControlFormat,\n\t\tsendSessionAffinityHeaders: model.compat.sendSessionAffinityHeaders ?? detected.sendSessionAffinityHeaders,\n\t\tsupportsLongCacheRetention: model.compat.supportsLongCacheRetention ?? detected.supportsLongCacheRetention,\n\t};\n}\n"]}