This commit is contained in:
Frank
2026-01-16 01:07:00 -05:00
parent de2de099b4
commit f66e6d7033
16 changed files with 228 additions and 80 deletions

View File

@@ -81,12 +81,13 @@ export async function handler(
const isTrial = await trialLimiter?.isTrial()
const rateLimiter = createRateLimiter(modelInfo.rateLimit, ip)
await rateLimiter?.check()
const stickyTracker = createStickyTracker(modelInfo.stickyProvider ?? false, sessionId)
const stickyTracker = createStickyTracker(modelInfo.stickyProvider, sessionId)
const stickyProvider = await stickyTracker?.get()
const authInfo = await authenticate(modelInfo)
const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => {
const providerInfo = selectProvider(
model,
zenData,
authInfo,
modelInfo,
@@ -101,7 +102,7 @@ export async function handler(
logger.metric({ provider: providerInfo.id })
const startTimestamp = Date.now()
const reqUrl = providerInfo.modifyUrl(providerInfo.api, providerInfo.model, isStream)
const reqUrl = providerInfo.modifyUrl(providerInfo.api, isStream)
const reqBody = JSON.stringify(
providerInfo.modifyBody({
...createBodyConverter(opts.format, providerInfo.format)(body),
@@ -135,7 +136,7 @@ export async function handler(
// ie. openai 404 error: Item with id 'msg_0ead8b004a3b165d0069436a6b6834819896da85b63b196a3f' not found.
res.status !== 404 &&
// ie. cannot change codex model providers mid-session
!modelInfo.stickyProvider &&
modelInfo.stickyProvider !== "strict" &&
modelInfo.fallbackProvider &&
providerInfo.id !== modelInfo.fallbackProvider
) {
@@ -194,17 +195,19 @@ export async function handler(
// Handle streaming response
const streamConverter = createStreamPartConverter(providerInfo.format, opts.format)
const usageParser = providerInfo.createUsageParser()
const binaryDecoder = providerInfo.createBinaryStreamDecoder()
const stream = new ReadableStream({
start(c) {
const reader = res.body?.getReader()
const decoder = new TextDecoder()
const encoder = new TextEncoder()
let buffer = ""
let responseLength = 0
function pump(): Promise<void> {
return (
reader?.read().then(async ({ done, value }) => {
reader?.read().then(async ({ done, value: rawValue }) => {
if (done) {
logger.metric({
response_length: responseLength,
@@ -230,6 +233,10 @@ export async function handler(
"timestamp.first_byte": now,
})
}
const value = binaryDecoder ? binaryDecoder(rawValue) : rawValue
if (!value) return
responseLength += value.length
buffer += decoder.decode(value, { stream: true })
dataDumper?.provideStream(buffer)
@@ -331,6 +338,7 @@ export async function handler(
}
function selectProvider(
reqModel: string,
zenData: ZenData,
authInfo: AuthInfo,
modelInfo: ModelInfo,
@@ -339,7 +347,7 @@ export async function handler(
retry: RetryOptions,
stickyProvider: string | undefined,
) {
const provider = (() => {
const modelProvider = (() => {
if (authInfo?.provider?.credentials) {
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
}
@@ -372,18 +380,19 @@ export async function handler(
return providers[index || 0]
})()
if (!provider) throw new ModelError("No provider available")
if (!(provider.id in zenData.providers)) throw new ModelError(`Provider ${provider.id} not supported`)
if (!modelProvider) throw new ModelError("No provider available")
if (!(modelProvider.id in zenData.providers)) throw new ModelError(`Provider ${modelProvider.id} not supported`)
return {
...provider,
...zenData.providers[provider.id],
...modelProvider,
...zenData.providers[modelProvider.id],
...(() => {
const format = zenData.providers[provider.id].format
if (format === "anthropic") return anthropicHelper
if (format === "google") return googleHelper
if (format === "openai") return openaiHelper
return oaCompatHelper
const format = zenData.providers[modelProvider.id].format
const providerModel = modelProvider.model
if (format === "anthropic") return anthropicHelper({ reqModel, providerModel })
if (format === "google") return googleHelper({ reqModel, providerModel })
if (format === "openai") return openaiHelper({ reqModel, providerModel })
return oaCompatHelper({ reqModel, providerModel })
})(),
}
}

View File

@@ -1,4 +1,6 @@
import { EventStreamCodec } from "@smithy/eventstream-codec"
import { ProviderHelper, CommonRequest, CommonResponse, CommonChunk } from "./provider"
import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
type Usage = {
cache_creation?: {
@@ -14,65 +16,164 @@ type Usage = {
}
}
export const anthropicHelper = {
format: "anthropic",
modifyUrl: (providerApi: string) => providerApi + "/messages",
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
headers.set("x-api-key", apiKey)
headers.set("anthropic-version", headers.get("anthropic-version") ?? "2023-06-01")
if (body.model.startsWith("claude-sonnet-")) {
headers.set("anthropic-beta", "context-1m-2025-08-07")
}
},
modifyBody: (body: Record<string, any>) => {
return {
export const anthropicHelper: ProviderHelper = ({ reqModel, providerModel }) => {
const isBedrockModelArn = providerModel.startsWith("arn:aws:bedrock:")
const isBedrockModelID = providerModel.startsWith("global.anthropic.")
const isBedrock = isBedrockModelArn || isBedrockModelID
const isSonnet = reqModel.includes("sonnet")
return {
format: "anthropic",
modifyUrl: (providerApi: string, isStream?: boolean) =>
isBedrock
? `${providerApi}/model/${isBedrockModelArn ? encodeURIComponent(providerModel) : providerModel}/${isStream ? "invoke-with-response-stream" : "invoke"}`
: providerApi + "/messages",
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
if (isBedrock) {
headers.set("Authorization", `Bearer ${apiKey}`)
} else {
headers.set("x-api-key", apiKey)
headers.set("anthropic-version", headers.get("anthropic-version") ?? "2023-06-01")
if (body.model.startsWith("claude-sonnet-")) {
headers.set("anthropic-beta", "context-1m-2025-08-07")
}
}
},
modifyBody: (body: Record<string, any>) => ({
...body,
service_tier: "standard_only",
}
},
streamSeparator: "\n\n",
createUsageParser: () => {
let usage: Usage
...(isBedrock
? {
anthropic_version: "bedrock-2023-05-31",
anthropic_beta: isSonnet ? "context-1m-2025-08-07" : undefined,
model: undefined,
stream: undefined,
}
: {
service_tier: "standard_only",
}),
}),
createBinaryStreamDecoder: () => {
if (!isBedrock) return undefined
return {
parse: (chunk: string) => {
const data = chunk.split("\n")[1]
if (!data.startsWith("data: ")) return
const decoder = new TextDecoder()
const encoder = new TextEncoder()
const codec = new EventStreamCodec(toUtf8, fromUtf8)
let buffer = new Uint8Array(0)
return (value: Uint8Array) => {
const newBuffer = new Uint8Array(buffer.length + value.length)
newBuffer.set(buffer)
newBuffer.set(value, buffer.length)
buffer = newBuffer
if (buffer.length < 4) return
// The first 4 bytes are the total length (big-endian).
const totalLength = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength).getUint32(0, false)
// If we don't have the full message yet, wait for more chunks.
if (buffer.length < totalLength) return
let json
try {
json = JSON.parse(data.slice(6))
} catch (e) {
return
}
// Decode exactly the sub-slice for this event.
const subView = buffer.subarray(0, totalLength)
const decoded = codec.decode(subView)
const usageUpdate = json.usage ?? json.message?.usage
if (!usageUpdate) return
usage = {
...usage,
...usageUpdate,
cache_creation: {
...usage?.cache_creation,
...usageUpdate.cache_creation,
},
server_tool_use: {
...usage?.server_tool_use,
...usageUpdate.server_tool_use,
},
// Slice the used bytes out of the buffer, removing this message.
buffer = buffer.slice(totalLength)
// Process message
/* Example of Bedrock data
```
{
bytes: 'eyJ0eXBlIjoibWVzc2FnZV9zdGFydCIsIm1lc3NhZ2UiOnsibW9kZWwiOiJjbGF1ZGUtb3B1cy00LTUtMjAyNTExMDEiLCJpZCI6Im1zZ19iZHJrXzAxMjVGdHRGb2lkNGlwWmZ4SzZMbktxeCIsInR5cGUiOiJtZXNzYWdlIiwicm9sZSI6ImFzc2lzdGFudCIsImNvbnRlbnQiOltdLCJzdG9wX3JlYXNvbiI6bnVsbCwic3RvcF9zZXF1ZW5jZSI6bnVsbCwidXNhZ2UiOnsiaW5wdXRfdG9rZW5zIjo0LCJjYWNoZV9jcmVhdGlvbl9pbnB1dF90b2tlbnMiOjEsImNhY2hlX3JlYWRfaW5wdXRfdG9rZW5zIjoxMTk2MywiY2FjaGVfY3JlYXRpb24iOnsiZXBoZW1lcmFsXzVtX2lucHV0X3Rva2VucyI6MSwiZXBoZW1lcmFsXzFoX2lucHV0X3Rva2VucyI6MH0sIm91dHB1dF90b2tlbnMiOjF9fX0=',
p: '...'
}
},
retrieve: () => usage,
}
},
normalizeUsage: (usage: Usage) => ({
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
reasoningTokens: undefined,
cacheReadTokens: usage.cache_read_input_tokens ?? undefined,
cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens ?? undefined,
cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens ?? undefined,
}),
} satisfies ProviderHelper
```
Decoded bytes
```
{
type: 'message_start',
message: {
model: 'claude-opus-4-5-20251101',
id: 'msg_bdrk_0125FttFoid4ipZfxK6LnKqx',
type: 'message',
role: 'assistant',
content: [],
stop_reason: null,
stop_sequence: null,
usage: {
input_tokens: 4,
cache_creation_input_tokens: 1,
cache_read_input_tokens: 11963,
cache_creation: [Object],
output_tokens: 1
}
}
}
```
*/
/* Example of Anthropic data
```
event: message_delta
data: {"type":"message_start","message":{"model":"claude-opus-4-5-20251101","id":"msg_01ETvwVWSKULxzPdkQ1xAnk2","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":11543,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":11543,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}}}
```
*/
if (decoded.headers[":message-type"]?.value !== "event") return
const data = decoder.decode(decoded.body, { stream: true })
const parsedDataResult = JSON.parse(data)
delete parsedDataResult.p
const utf8 = atob(parsedDataResult.bytes)
return encoder.encode(["event: message_start", "\n", "data: " + utf8, "\n\n"].join(""))
} catch (e) {
console.log(e)
}
}
},
streamSeparator: "\n\n",
createUsageParser: () => {
let usage: Usage
return {
parse: (chunk: string) => {
const data = chunk.split("\n")[1]
if (!data.startsWith("data: ")) return
let json
try {
json = JSON.parse(data.slice(6))
} catch (e) {
return
}
const usageUpdate = json.usage ?? json.message?.usage
if (!usageUpdate) return
usage = {
...usage,
...usageUpdate,
cache_creation: {
...usage?.cache_creation,
...usageUpdate.cache_creation,
},
server_tool_use: {
...usage?.server_tool_use,
...usageUpdate.server_tool_use,
},
}
},
retrieve: () => usage,
}
},
normalizeUsage: (usage: Usage) => ({
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
reasoningTokens: undefined,
cacheReadTokens: usage.cache_read_input_tokens ?? undefined,
cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens ?? undefined,
cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens ?? undefined,
}),
}
}
export function fromAnthropicRequest(body: any): CommonRequest {
if (!body || typeof body !== "object") return body

View File

@@ -26,16 +26,17 @@ type Usage = {
thoughtsTokenCount?: number
}
export const googleHelper = {
export const googleHelper: ProviderHelper = ({ providerModel }) => ({
format: "google",
modifyUrl: (providerApi: string, model?: string, isStream?: boolean) =>
`${providerApi}/models/${model}:${isStream ? "streamGenerateContent?alt=sse" : "generateContent"}`,
modifyUrl: (providerApi: string, isStream?: boolean) =>
`${providerApi}/models/${providerModel}:${isStream ? "streamGenerateContent?alt=sse" : "generateContent"}`,
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
headers.set("x-goog-api-key", apiKey)
},
modifyBody: (body: Record<string, any>) => {
return body
},
createBinaryStreamDecoder: () => undefined,
streamSeparator: "\r\n\r\n",
createUsageParser: () => {
let usage: Usage
@@ -71,4 +72,4 @@ export const googleHelper = {
cacheWrite1hTokens: undefined,
}
},
} satisfies ProviderHelper
})

View File

@@ -21,7 +21,7 @@ type Usage = {
}
}
export const oaCompatHelper = {
export const oaCompatHelper: ProviderHelper = () => ({
format: "oa-compat",
modifyUrl: (providerApi: string) => providerApi + "/chat/completions",
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
@@ -33,6 +33,7 @@ export const oaCompatHelper = {
...(body.stream ? { stream_options: { include_usage: true } } : {}),
}
},
createBinaryStreamDecoder: () => undefined,
streamSeparator: "\n\n",
createUsageParser: () => {
let usage: Usage
@@ -68,7 +69,7 @@ export const oaCompatHelper = {
cacheWrite1hTokens: undefined,
}
},
} satisfies ProviderHelper
})
export function fromOaCompatibleRequest(body: any): CommonRequest {
if (!body || typeof body !== "object") return body

View File

@@ -12,7 +12,7 @@ type Usage = {
total_tokens?: number
}
export const openaiHelper = {
export const openaiHelper: ProviderHelper = () => ({
format: "openai",
modifyUrl: (providerApi: string) => providerApi + "/responses",
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
@@ -21,6 +21,7 @@ export const openaiHelper = {
modifyBody: (body: Record<string, any>) => {
return body
},
createBinaryStreamDecoder: () => undefined,
streamSeparator: "\n\n",
createUsageParser: () => {
let usage: Usage
@@ -58,7 +59,7 @@ export const openaiHelper = {
cacheWrite1hTokens: undefined,
}
},
} satisfies ProviderHelper
})
export function fromOpenaiRequest(body: any): CommonRequest {
if (!body || typeof body !== "object") return body

View File

@@ -33,11 +33,12 @@ export type UsageInfo = {
cacheWrite1hTokens?: number
}
export type ProviderHelper = {
export type ProviderHelper = (input: { reqModel: string; providerModel: string }) => {
format: ZenData.Format
modifyUrl: (providerApi: string, model?: string, isStream?: boolean) => string
modifyUrl: (providerApi: string, isStream?: boolean) => string
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => void
modifyBody: (body: Record<string, any>) => Record<string, any>
createBinaryStreamDecoder: () => ((chunk: Uint8Array) => Uint8Array | undefined) | undefined
streamSeparator: string
createUsageParser: () => {
parse: (chunk: string) => void

View File

@@ -1,6 +1,6 @@
import { Resource } from "@opencode-ai/console-resource"
export function createStickyTracker(stickyProvider: boolean, session: string) {
export function createStickyTracker(stickyProvider: "strict" | "prefer" | undefined, session: string) {
if (!stickyProvider) return
if (!session) return
const key = `sticky:${session}`