wip: zen
This commit is contained in:
@@ -81,12 +81,13 @@ export async function handler(
|
||||
const isTrial = await trialLimiter?.isTrial()
|
||||
const rateLimiter = createRateLimiter(modelInfo.rateLimit, ip)
|
||||
await rateLimiter?.check()
|
||||
const stickyTracker = createStickyTracker(modelInfo.stickyProvider ?? false, sessionId)
|
||||
const stickyTracker = createStickyTracker(modelInfo.stickyProvider, sessionId)
|
||||
const stickyProvider = await stickyTracker?.get()
|
||||
const authInfo = await authenticate(modelInfo)
|
||||
|
||||
const retriableRequest = async (retry: RetryOptions = { excludeProviders: [], retryCount: 0 }) => {
|
||||
const providerInfo = selectProvider(
|
||||
model,
|
||||
zenData,
|
||||
authInfo,
|
||||
modelInfo,
|
||||
@@ -101,7 +102,7 @@ export async function handler(
|
||||
logger.metric({ provider: providerInfo.id })
|
||||
|
||||
const startTimestamp = Date.now()
|
||||
const reqUrl = providerInfo.modifyUrl(providerInfo.api, providerInfo.model, isStream)
|
||||
const reqUrl = providerInfo.modifyUrl(providerInfo.api, isStream)
|
||||
const reqBody = JSON.stringify(
|
||||
providerInfo.modifyBody({
|
||||
...createBodyConverter(opts.format, providerInfo.format)(body),
|
||||
@@ -135,7 +136,7 @@ export async function handler(
|
||||
// ie. openai 404 error: Item with id 'msg_0ead8b004a3b165d0069436a6b6834819896da85b63b196a3f' not found.
|
||||
res.status !== 404 &&
|
||||
// ie. cannot change codex model providers mid-session
|
||||
!modelInfo.stickyProvider &&
|
||||
modelInfo.stickyProvider !== "strict" &&
|
||||
modelInfo.fallbackProvider &&
|
||||
providerInfo.id !== modelInfo.fallbackProvider
|
||||
) {
|
||||
@@ -194,17 +195,19 @@ export async function handler(
|
||||
// Handle streaming response
|
||||
const streamConverter = createStreamPartConverter(providerInfo.format, opts.format)
|
||||
const usageParser = providerInfo.createUsageParser()
|
||||
const binaryDecoder = providerInfo.createBinaryStreamDecoder()
|
||||
const stream = new ReadableStream({
|
||||
start(c) {
|
||||
const reader = res.body?.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
const encoder = new TextEncoder()
|
||||
|
||||
let buffer = ""
|
||||
let responseLength = 0
|
||||
|
||||
function pump(): Promise<void> {
|
||||
return (
|
||||
reader?.read().then(async ({ done, value }) => {
|
||||
reader?.read().then(async ({ done, value: rawValue }) => {
|
||||
if (done) {
|
||||
logger.metric({
|
||||
response_length: responseLength,
|
||||
@@ -230,6 +233,10 @@ export async function handler(
|
||||
"timestamp.first_byte": now,
|
||||
})
|
||||
}
|
||||
|
||||
const value = binaryDecoder ? binaryDecoder(rawValue) : rawValue
|
||||
if (!value) return
|
||||
|
||||
responseLength += value.length
|
||||
buffer += decoder.decode(value, { stream: true })
|
||||
dataDumper?.provideStream(buffer)
|
||||
@@ -331,6 +338,7 @@ export async function handler(
|
||||
}
|
||||
|
||||
function selectProvider(
|
||||
reqModel: string,
|
||||
zenData: ZenData,
|
||||
authInfo: AuthInfo,
|
||||
modelInfo: ModelInfo,
|
||||
@@ -339,7 +347,7 @@ export async function handler(
|
||||
retry: RetryOptions,
|
||||
stickyProvider: string | undefined,
|
||||
) {
|
||||
const provider = (() => {
|
||||
const modelProvider = (() => {
|
||||
if (authInfo?.provider?.credentials) {
|
||||
return modelInfo.providers.find((provider) => provider.id === modelInfo.byokProvider)
|
||||
}
|
||||
@@ -372,18 +380,19 @@ export async function handler(
|
||||
return providers[index || 0]
|
||||
})()
|
||||
|
||||
if (!provider) throw new ModelError("No provider available")
|
||||
if (!(provider.id in zenData.providers)) throw new ModelError(`Provider ${provider.id} not supported`)
|
||||
if (!modelProvider) throw new ModelError("No provider available")
|
||||
if (!(modelProvider.id in zenData.providers)) throw new ModelError(`Provider ${modelProvider.id} not supported`)
|
||||
|
||||
return {
|
||||
...provider,
|
||||
...zenData.providers[provider.id],
|
||||
...modelProvider,
|
||||
...zenData.providers[modelProvider.id],
|
||||
...(() => {
|
||||
const format = zenData.providers[provider.id].format
|
||||
if (format === "anthropic") return anthropicHelper
|
||||
if (format === "google") return googleHelper
|
||||
if (format === "openai") return openaiHelper
|
||||
return oaCompatHelper
|
||||
const format = zenData.providers[modelProvider.id].format
|
||||
const providerModel = modelProvider.model
|
||||
if (format === "anthropic") return anthropicHelper({ reqModel, providerModel })
|
||||
if (format === "google") return googleHelper({ reqModel, providerModel })
|
||||
if (format === "openai") return openaiHelper({ reqModel, providerModel })
|
||||
return oaCompatHelper({ reqModel, providerModel })
|
||||
})(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { EventStreamCodec } from "@smithy/eventstream-codec"
|
||||
import { ProviderHelper, CommonRequest, CommonResponse, CommonChunk } from "./provider"
|
||||
import { fromUtf8, toUtf8 } from "@smithy/util-utf8"
|
||||
|
||||
type Usage = {
|
||||
cache_creation?: {
|
||||
@@ -14,65 +16,164 @@ type Usage = {
|
||||
}
|
||||
}
|
||||
|
||||
export const anthropicHelper = {
|
||||
format: "anthropic",
|
||||
modifyUrl: (providerApi: string) => providerApi + "/messages",
|
||||
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
|
||||
headers.set("x-api-key", apiKey)
|
||||
headers.set("anthropic-version", headers.get("anthropic-version") ?? "2023-06-01")
|
||||
if (body.model.startsWith("claude-sonnet-")) {
|
||||
headers.set("anthropic-beta", "context-1m-2025-08-07")
|
||||
}
|
||||
},
|
||||
modifyBody: (body: Record<string, any>) => {
|
||||
return {
|
||||
export const anthropicHelper: ProviderHelper = ({ reqModel, providerModel }) => {
|
||||
const isBedrockModelArn = providerModel.startsWith("arn:aws:bedrock:")
|
||||
const isBedrockModelID = providerModel.startsWith("global.anthropic.")
|
||||
const isBedrock = isBedrockModelArn || isBedrockModelID
|
||||
const isSonnet = reqModel.includes("sonnet")
|
||||
return {
|
||||
format: "anthropic",
|
||||
modifyUrl: (providerApi: string, isStream?: boolean) =>
|
||||
isBedrock
|
||||
? `${providerApi}/model/${isBedrockModelArn ? encodeURIComponent(providerModel) : providerModel}/${isStream ? "invoke-with-response-stream" : "invoke"}`
|
||||
: providerApi + "/messages",
|
||||
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
|
||||
if (isBedrock) {
|
||||
headers.set("Authorization", `Bearer ${apiKey}`)
|
||||
} else {
|
||||
headers.set("x-api-key", apiKey)
|
||||
headers.set("anthropic-version", headers.get("anthropic-version") ?? "2023-06-01")
|
||||
if (body.model.startsWith("claude-sonnet-")) {
|
||||
headers.set("anthropic-beta", "context-1m-2025-08-07")
|
||||
}
|
||||
}
|
||||
},
|
||||
modifyBody: (body: Record<string, any>) => ({
|
||||
...body,
|
||||
service_tier: "standard_only",
|
||||
}
|
||||
},
|
||||
streamSeparator: "\n\n",
|
||||
createUsageParser: () => {
|
||||
let usage: Usage
|
||||
...(isBedrock
|
||||
? {
|
||||
anthropic_version: "bedrock-2023-05-31",
|
||||
anthropic_beta: isSonnet ? "context-1m-2025-08-07" : undefined,
|
||||
model: undefined,
|
||||
stream: undefined,
|
||||
}
|
||||
: {
|
||||
service_tier: "standard_only",
|
||||
}),
|
||||
}),
|
||||
createBinaryStreamDecoder: () => {
|
||||
if (!isBedrock) return undefined
|
||||
|
||||
return {
|
||||
parse: (chunk: string) => {
|
||||
const data = chunk.split("\n")[1]
|
||||
if (!data.startsWith("data: ")) return
|
||||
const decoder = new TextDecoder()
|
||||
const encoder = new TextEncoder()
|
||||
const codec = new EventStreamCodec(toUtf8, fromUtf8)
|
||||
let buffer = new Uint8Array(0)
|
||||
return (value: Uint8Array) => {
|
||||
const newBuffer = new Uint8Array(buffer.length + value.length)
|
||||
newBuffer.set(buffer)
|
||||
newBuffer.set(value, buffer.length)
|
||||
buffer = newBuffer
|
||||
|
||||
if (buffer.length < 4) return
|
||||
// The first 4 bytes are the total length (big-endian).
|
||||
const totalLength = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength).getUint32(0, false)
|
||||
|
||||
// If we don't have the full message yet, wait for more chunks.
|
||||
if (buffer.length < totalLength) return
|
||||
|
||||
let json
|
||||
try {
|
||||
json = JSON.parse(data.slice(6))
|
||||
} catch (e) {
|
||||
return
|
||||
}
|
||||
// Decode exactly the sub-slice for this event.
|
||||
const subView = buffer.subarray(0, totalLength)
|
||||
const decoded = codec.decode(subView)
|
||||
|
||||
const usageUpdate = json.usage ?? json.message?.usage
|
||||
if (!usageUpdate) return
|
||||
usage = {
|
||||
...usage,
|
||||
...usageUpdate,
|
||||
cache_creation: {
|
||||
...usage?.cache_creation,
|
||||
...usageUpdate.cache_creation,
|
||||
},
|
||||
server_tool_use: {
|
||||
...usage?.server_tool_use,
|
||||
...usageUpdate.server_tool_use,
|
||||
},
|
||||
// Slice the used bytes out of the buffer, removing this message.
|
||||
buffer = buffer.slice(totalLength)
|
||||
|
||||
// Process message
|
||||
/* Example of Bedrock data
|
||||
```
|
||||
{
|
||||
bytes: 'eyJ0eXBlIjoibWVzc2FnZV9zdGFydCIsIm1lc3NhZ2UiOnsibW9kZWwiOiJjbGF1ZGUtb3B1cy00LTUtMjAyNTExMDEiLCJpZCI6Im1zZ19iZHJrXzAxMjVGdHRGb2lkNGlwWmZ4SzZMbktxeCIsInR5cGUiOiJtZXNzYWdlIiwicm9sZSI6ImFzc2lzdGFudCIsImNvbnRlbnQiOltdLCJzdG9wX3JlYXNvbiI6bnVsbCwic3RvcF9zZXF1ZW5jZSI6bnVsbCwidXNhZ2UiOnsiaW5wdXRfdG9rZW5zIjo0LCJjYWNoZV9jcmVhdGlvbl9pbnB1dF90b2tlbnMiOjEsImNhY2hlX3JlYWRfaW5wdXRfdG9rZW5zIjoxMTk2MywiY2FjaGVfY3JlYXRpb24iOnsiZXBoZW1lcmFsXzVtX2lucHV0X3Rva2VucyI6MSwiZXBoZW1lcmFsXzFoX2lucHV0X3Rva2VucyI6MH0sIm91dHB1dF90b2tlbnMiOjF9fX0=',
|
||||
p: '...'
|
||||
}
|
||||
},
|
||||
retrieve: () => usage,
|
||||
}
|
||||
},
|
||||
normalizeUsage: (usage: Usage) => ({
|
||||
inputTokens: usage.input_tokens ?? 0,
|
||||
outputTokens: usage.output_tokens ?? 0,
|
||||
reasoningTokens: undefined,
|
||||
cacheReadTokens: usage.cache_read_input_tokens ?? undefined,
|
||||
cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens ?? undefined,
|
||||
cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens ?? undefined,
|
||||
}),
|
||||
} satisfies ProviderHelper
|
||||
```
|
||||
|
||||
Decoded bytes
|
||||
```
|
||||
{
|
||||
type: 'message_start',
|
||||
message: {
|
||||
model: 'claude-opus-4-5-20251101',
|
||||
id: 'msg_bdrk_0125FttFoid4ipZfxK6LnKqx',
|
||||
type: 'message',
|
||||
role: 'assistant',
|
||||
content: [],
|
||||
stop_reason: null,
|
||||
stop_sequence: null,
|
||||
usage: {
|
||||
input_tokens: 4,
|
||||
cache_creation_input_tokens: 1,
|
||||
cache_read_input_tokens: 11963,
|
||||
cache_creation: [Object],
|
||||
output_tokens: 1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
*/
|
||||
|
||||
/* Example of Anthropic data
|
||||
```
|
||||
event: message_delta
|
||||
data: {"type":"message_start","message":{"model":"claude-opus-4-5-20251101","id":"msg_01ETvwVWSKULxzPdkQ1xAnk2","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":11543,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":11543,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}}}
|
||||
```
|
||||
*/
|
||||
if (decoded.headers[":message-type"]?.value !== "event") return
|
||||
const data = decoder.decode(decoded.body, { stream: true })
|
||||
|
||||
const parsedDataResult = JSON.parse(data)
|
||||
delete parsedDataResult.p
|
||||
const utf8 = atob(parsedDataResult.bytes)
|
||||
return encoder.encode(["event: message_start", "\n", "data: " + utf8, "\n\n"].join(""))
|
||||
} catch (e) {
|
||||
console.log(e)
|
||||
}
|
||||
}
|
||||
},
|
||||
streamSeparator: "\n\n",
|
||||
createUsageParser: () => {
|
||||
let usage: Usage
|
||||
|
||||
return {
|
||||
parse: (chunk: string) => {
|
||||
const data = chunk.split("\n")[1]
|
||||
if (!data.startsWith("data: ")) return
|
||||
|
||||
let json
|
||||
try {
|
||||
json = JSON.parse(data.slice(6))
|
||||
} catch (e) {
|
||||
return
|
||||
}
|
||||
|
||||
const usageUpdate = json.usage ?? json.message?.usage
|
||||
if (!usageUpdate) return
|
||||
usage = {
|
||||
...usage,
|
||||
...usageUpdate,
|
||||
cache_creation: {
|
||||
...usage?.cache_creation,
|
||||
...usageUpdate.cache_creation,
|
||||
},
|
||||
server_tool_use: {
|
||||
...usage?.server_tool_use,
|
||||
...usageUpdate.server_tool_use,
|
||||
},
|
||||
}
|
||||
},
|
||||
retrieve: () => usage,
|
||||
}
|
||||
},
|
||||
normalizeUsage: (usage: Usage) => ({
|
||||
inputTokens: usage.input_tokens ?? 0,
|
||||
outputTokens: usage.output_tokens ?? 0,
|
||||
reasoningTokens: undefined,
|
||||
cacheReadTokens: usage.cache_read_input_tokens ?? undefined,
|
||||
cacheWrite5mTokens: usage.cache_creation?.ephemeral_5m_input_tokens ?? undefined,
|
||||
cacheWrite1hTokens: usage.cache_creation?.ephemeral_1h_input_tokens ?? undefined,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
export function fromAnthropicRequest(body: any): CommonRequest {
|
||||
if (!body || typeof body !== "object") return body
|
||||
|
||||
@@ -26,16 +26,17 @@ type Usage = {
|
||||
thoughtsTokenCount?: number
|
||||
}
|
||||
|
||||
export const googleHelper = {
|
||||
export const googleHelper: ProviderHelper = ({ providerModel }) => ({
|
||||
format: "google",
|
||||
modifyUrl: (providerApi: string, model?: string, isStream?: boolean) =>
|
||||
`${providerApi}/models/${model}:${isStream ? "streamGenerateContent?alt=sse" : "generateContent"}`,
|
||||
modifyUrl: (providerApi: string, isStream?: boolean) =>
|
||||
`${providerApi}/models/${providerModel}:${isStream ? "streamGenerateContent?alt=sse" : "generateContent"}`,
|
||||
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
|
||||
headers.set("x-goog-api-key", apiKey)
|
||||
},
|
||||
modifyBody: (body: Record<string, any>) => {
|
||||
return body
|
||||
},
|
||||
createBinaryStreamDecoder: () => undefined,
|
||||
streamSeparator: "\r\n\r\n",
|
||||
createUsageParser: () => {
|
||||
let usage: Usage
|
||||
@@ -71,4 +72,4 @@ export const googleHelper = {
|
||||
cacheWrite1hTokens: undefined,
|
||||
}
|
||||
},
|
||||
} satisfies ProviderHelper
|
||||
})
|
||||
|
||||
@@ -21,7 +21,7 @@ type Usage = {
|
||||
}
|
||||
}
|
||||
|
||||
export const oaCompatHelper = {
|
||||
export const oaCompatHelper: ProviderHelper = () => ({
|
||||
format: "oa-compat",
|
||||
modifyUrl: (providerApi: string) => providerApi + "/chat/completions",
|
||||
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
|
||||
@@ -33,6 +33,7 @@ export const oaCompatHelper = {
|
||||
...(body.stream ? { stream_options: { include_usage: true } } : {}),
|
||||
}
|
||||
},
|
||||
createBinaryStreamDecoder: () => undefined,
|
||||
streamSeparator: "\n\n",
|
||||
createUsageParser: () => {
|
||||
let usage: Usage
|
||||
@@ -68,7 +69,7 @@ export const oaCompatHelper = {
|
||||
cacheWrite1hTokens: undefined,
|
||||
}
|
||||
},
|
||||
} satisfies ProviderHelper
|
||||
})
|
||||
|
||||
export function fromOaCompatibleRequest(body: any): CommonRequest {
|
||||
if (!body || typeof body !== "object") return body
|
||||
|
||||
@@ -12,7 +12,7 @@ type Usage = {
|
||||
total_tokens?: number
|
||||
}
|
||||
|
||||
export const openaiHelper = {
|
||||
export const openaiHelper: ProviderHelper = () => ({
|
||||
format: "openai",
|
||||
modifyUrl: (providerApi: string) => providerApi + "/responses",
|
||||
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => {
|
||||
@@ -21,6 +21,7 @@ export const openaiHelper = {
|
||||
modifyBody: (body: Record<string, any>) => {
|
||||
return body
|
||||
},
|
||||
createBinaryStreamDecoder: () => undefined,
|
||||
streamSeparator: "\n\n",
|
||||
createUsageParser: () => {
|
||||
let usage: Usage
|
||||
@@ -58,7 +59,7 @@ export const openaiHelper = {
|
||||
cacheWrite1hTokens: undefined,
|
||||
}
|
||||
},
|
||||
} satisfies ProviderHelper
|
||||
})
|
||||
|
||||
export function fromOpenaiRequest(body: any): CommonRequest {
|
||||
if (!body || typeof body !== "object") return body
|
||||
|
||||
@@ -33,11 +33,12 @@ export type UsageInfo = {
|
||||
cacheWrite1hTokens?: number
|
||||
}
|
||||
|
||||
export type ProviderHelper = {
|
||||
export type ProviderHelper = (input: { reqModel: string; providerModel: string }) => {
|
||||
format: ZenData.Format
|
||||
modifyUrl: (providerApi: string, model?: string, isStream?: boolean) => string
|
||||
modifyUrl: (providerApi: string, isStream?: boolean) => string
|
||||
modifyHeaders: (headers: Headers, body: Record<string, any>, apiKey: string) => void
|
||||
modifyBody: (body: Record<string, any>) => Record<string, any>
|
||||
createBinaryStreamDecoder: () => ((chunk: Uint8Array) => Uint8Array | undefined) | undefined
|
||||
streamSeparator: string
|
||||
createUsageParser: () => {
|
||||
parse: (chunk: string) => void
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { Resource } from "@opencode-ai/console-resource"
|
||||
|
||||
export function createStickyTracker(stickyProvider: boolean, session: string) {
|
||||
export function createStickyTracker(stickyProvider: "strict" | "prefer" | undefined, session: string) {
|
||||
if (!stickyProvider) return
|
||||
if (!session) return
|
||||
const key = `sticky:${session}`
|
||||
|
||||
Reference in New Issue
Block a user