|
|
|
|
@@ -38,6 +38,7 @@ type RetryOptions = {
|
|
|
|
|
excludeProviders: string[]
|
|
|
|
|
retryCount: number
|
|
|
|
|
}
|
|
|
|
|
type BillingSource = "anonymous" | "free" | "byok" | "subscription" | "balance"
|
|
|
|
|
|
|
|
|
|
export async function handler(
|
|
|
|
|
input: APIEvent,
|
|
|
|
|
@@ -51,6 +52,7 @@ export async function handler(
|
|
|
|
|
type AuthInfo = Awaited<ReturnType<typeof authenticate>>
|
|
|
|
|
type ModelInfo = Awaited<ReturnType<typeof validateModel>>
|
|
|
|
|
type ProviderInfo = Awaited<ReturnType<typeof selectProvider>>
|
|
|
|
|
type CostInfo = ReturnType<typeof calculateCost>
|
|
|
|
|
|
|
|
|
|
const MAX_FAILOVER_RETRIES = 3
|
|
|
|
|
const MAX_429_RETRIES = 3
|
|
|
|
|
@@ -139,21 +141,22 @@ export async function handler(
|
|
|
|
|
"llm.error.code": res.status,
|
|
|
|
|
"llm.error.message": res.statusText,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Try another provider => stop retrying if using fallback provider
|
|
|
|
|
if (
|
|
|
|
|
// ie. openai 404 error: Item with id 'msg_0ead8b004a3b165d0069436a6b6834819896da85b63b196a3f' not found.
|
|
|
|
|
res.status !== 404 &&
|
|
|
|
|
// ie. cannot change codex model providers mid-session
|
|
|
|
|
modelInfo.stickyProvider !== "strict" &&
|
|
|
|
|
modelInfo.fallbackProvider &&
|
|
|
|
|
providerInfo.id !== modelInfo.fallbackProvider
|
|
|
|
|
) {
|
|
|
|
|
return retriableRequest({
|
|
|
|
|
excludeProviders: [...retry.excludeProviders, providerInfo.id],
|
|
|
|
|
retryCount: retry.retryCount + 1,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
// Try another provider => stop retrying if using fallback provider
|
|
|
|
|
if (
|
|
|
|
|
res.status !== 200 &&
|
|
|
|
|
// ie. openai 404 error: Item with id 'msg_0ead8b004a3b165d0069436a6b6834819896da85b63b196a3f' not found.
|
|
|
|
|
res.status !== 404 &&
|
|
|
|
|
// ie. cannot change codex model providers mid-session
|
|
|
|
|
modelInfo.stickyProvider !== "strict" &&
|
|
|
|
|
modelInfo.fallbackProvider &&
|
|
|
|
|
providerInfo.id !== modelInfo.fallbackProvider
|
|
|
|
|
) {
|
|
|
|
|
return retriableRequest({
|
|
|
|
|
excludeProviders: [...retry.excludeProviders, providerInfo.id],
|
|
|
|
|
retryCount: retry.retryCount + 1,
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return { providerInfo, reqBody, res, startTimestamp }
|
|
|
|
|
@@ -183,18 +186,25 @@ export async function handler(
|
|
|
|
|
|
|
|
|
|
// Handle non-streaming response
|
|
|
|
|
if (!isStream) {
|
|
|
|
|
const responseConverter = createResponseConverter(providerInfo.format, opts.format)
|
|
|
|
|
const json = await res.json()
|
|
|
|
|
const body = JSON.stringify(responseConverter(json))
|
|
|
|
|
const usageInfo = providerInfo.normalizeUsage(json.usage)
|
|
|
|
|
const costInfo = calculateCost(modelInfo, usageInfo)
|
|
|
|
|
await trialLimiter?.track(usageInfo)
|
|
|
|
|
await rateLimiter?.track()
|
|
|
|
|
await trackUsage(billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
|
|
|
|
|
await reload(billingSource, authInfo, costInfo)
|
|
|
|
|
|
|
|
|
|
const responseConverter = createResponseConverter(providerInfo.format, opts.format)
|
|
|
|
|
const body = JSON.stringify(
|
|
|
|
|
responseConverter({
|
|
|
|
|
...json,
|
|
|
|
|
cost: calculateOccuredCost(billingSource, costInfo),
|
|
|
|
|
}),
|
|
|
|
|
)
|
|
|
|
|
logger.metric({ response_length: body.length })
|
|
|
|
|
logger.debug("RESPONSE: " + body)
|
|
|
|
|
dataDumper?.provideResponse(body)
|
|
|
|
|
dataDumper?.flush()
|
|
|
|
|
const tokensInfo = providerInfo.normalizeUsage(json.usage)
|
|
|
|
|
await trialLimiter?.track(tokensInfo)
|
|
|
|
|
await rateLimiter?.track()
|
|
|
|
|
const costInfo = await trackUsage(authInfo, modelInfo, providerInfo, billingSource, tokensInfo)
|
|
|
|
|
await reload(authInfo, costInfo)
|
|
|
|
|
return new Response(body, {
|
|
|
|
|
status: resStatus,
|
|
|
|
|
statusText: res.statusText,
|
|
|
|
|
@@ -226,12 +236,16 @@ export async function handler(
|
|
|
|
|
dataDumper?.flush()
|
|
|
|
|
await rateLimiter?.track()
|
|
|
|
|
const usage = usageParser.retrieve()
|
|
|
|
|
let cost = "0"
|
|
|
|
|
if (usage) {
|
|
|
|
|
const tokensInfo = providerInfo.normalizeUsage(usage)
|
|
|
|
|
await trialLimiter?.track(tokensInfo)
|
|
|
|
|
const costInfo = await trackUsage(authInfo, modelInfo, providerInfo, billingSource, tokensInfo)
|
|
|
|
|
await reload(authInfo, costInfo)
|
|
|
|
|
const usageInfo = providerInfo.normalizeUsage(usage)
|
|
|
|
|
const costInfo = calculateCost(modelInfo, usageInfo)
|
|
|
|
|
await trialLimiter?.track(usageInfo)
|
|
|
|
|
await trackUsage(billingSource, authInfo, modelInfo, providerInfo, usageInfo, costInfo)
|
|
|
|
|
await reload(billingSource, authInfo, costInfo)
|
|
|
|
|
cost = calculateOccuredCost(billingSource, costInfo)
|
|
|
|
|
}
|
|
|
|
|
c.enqueue(encoder.encode(usageParser.buidlCostChunk(cost)))
|
|
|
|
|
c.close()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
@@ -283,7 +297,6 @@ export async function handler(
|
|
|
|
|
return pump()
|
|
|
|
|
},
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
return new Response(stream, {
|
|
|
|
|
status: resStatus,
|
|
|
|
|
statusText: res.statusText,
|
|
|
|
|
@@ -498,9 +511,9 @@ export async function handler(
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function validateBilling(authInfo: AuthInfo, modelInfo: ModelInfo) {
|
|
|
|
|
function validateBilling(authInfo: AuthInfo, modelInfo: ModelInfo): BillingSource {
|
|
|
|
|
if (!authInfo) return "anonymous"
|
|
|
|
|
if (authInfo.provider?.credentials) return "free"
|
|
|
|
|
if (authInfo.provider?.credentials) return "byok"
|
|
|
|
|
if (authInfo.isFree) return "free"
|
|
|
|
|
if (modelInfo.allowAnonymous) return "free"
|
|
|
|
|
|
|
|
|
|
@@ -613,13 +626,7 @@ export async function handler(
|
|
|
|
|
return res
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function trackUsage(
|
|
|
|
|
authInfo: AuthInfo,
|
|
|
|
|
modelInfo: ModelInfo,
|
|
|
|
|
providerInfo: ProviderInfo,
|
|
|
|
|
billingSource: ReturnType<typeof validateBilling>,
|
|
|
|
|
usageInfo: UsageInfo,
|
|
|
|
|
) {
|
|
|
|
|
function calculateCost(modelInfo: ModelInfo, usageInfo: UsageInfo) {
|
|
|
|
|
const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
|
|
|
|
|
usageInfo
|
|
|
|
|
|
|
|
|
|
@@ -657,6 +664,33 @@ export async function handler(
|
|
|
|
|
(cacheReadCost ?? 0) +
|
|
|
|
|
(cacheWrite5mCost ?? 0) +
|
|
|
|
|
(cacheWrite1hCost ?? 0)
|
|
|
|
|
return {
|
|
|
|
|
totalCostInCent,
|
|
|
|
|
inputCost,
|
|
|
|
|
outputCost,
|
|
|
|
|
reasoningCost,
|
|
|
|
|
cacheReadCost,
|
|
|
|
|
cacheWrite5mCost,
|
|
|
|
|
cacheWrite1hCost,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function calculateOccuredCost(billingSource: BillingSource, costInfo: CostInfo) {
|
|
|
|
|
return billingSource === "balance" ? (costInfo.totalCostInCent / 100).toFixed(8) : "0"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function trackUsage(
|
|
|
|
|
billingSource: BillingSource,
|
|
|
|
|
authInfo: AuthInfo,
|
|
|
|
|
modelInfo: ModelInfo,
|
|
|
|
|
providerInfo: ProviderInfo,
|
|
|
|
|
usageInfo: UsageInfo,
|
|
|
|
|
costInfo: CostInfo,
|
|
|
|
|
) {
|
|
|
|
|
const { inputTokens, outputTokens, reasoningTokens, cacheReadTokens, cacheWrite5mTokens, cacheWrite1hTokens } =
|
|
|
|
|
usageInfo
|
|
|
|
|
const { totalCostInCent, inputCost, outputCost, reasoningCost, cacheReadCost, cacheWrite5mCost, cacheWrite1hCost } =
|
|
|
|
|
costInfo
|
|
|
|
|
|
|
|
|
|
logger.metric({
|
|
|
|
|
"tokens.input": inputTokens,
|
|
|
|
|
@@ -677,7 +711,7 @@ export async function handler(
|
|
|
|
|
if (billingSource === "anonymous") return
|
|
|
|
|
authInfo = authInfo!
|
|
|
|
|
|
|
|
|
|
const cost = authInfo.provider?.credentials ? 0 : centsToMicroCents(totalCostInCent)
|
|
|
|
|
const cost = centsToMicroCents(totalCostInCent)
|
|
|
|
|
await Database.use((db) =>
|
|
|
|
|
Promise.all([
|
|
|
|
|
db.insert(UsageTable).values({
|
|
|
|
|
@@ -772,16 +806,12 @@ export async function handler(
|
|
|
|
|
return { costInMicroCents: cost }
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function reload(authInfo: AuthInfo, costInfo: Awaited<ReturnType<typeof trackUsage>>) {
|
|
|
|
|
if (!authInfo) return
|
|
|
|
|
if (authInfo.isFree) return
|
|
|
|
|
if (authInfo.provider?.credentials) return
|
|
|
|
|
if (authInfo.subscription) return
|
|
|
|
|
|
|
|
|
|
if (!costInfo) return
|
|
|
|
|
async function reload(billingSource: BillingSource, authInfo: AuthInfo, costInfo: CostInfo) {
|
|
|
|
|
if (billingSource !== "balance") return
|
|
|
|
|
authInfo = authInfo!
|
|
|
|
|
|
|
|
|
|
const reloadTrigger = centsToMicroCents((authInfo.billing.reloadTrigger ?? Billing.RELOAD_TRIGGER) * 100)
|
|
|
|
|
if (authInfo.billing.balance - costInfo.costInMicroCents >= reloadTrigger) return
|
|
|
|
|
if (authInfo.billing.balance - costInfo.totalCostInCent >= reloadTrigger) return
|
|
|
|
|
if (authInfo.billing.timeReloadLockedTill && authInfo.billing.timeReloadLockedTill > new Date()) return
|
|
|
|
|
|
|
|
|
|
const lock = await Database.use((tx) =>
|
|
|
|
|
|