From 82f579459c951bb44f8f2be1480fc09c06b6db6d Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Tue, 24 Jun 2025 20:28:31 -0400 Subject: [PATCH 01/15] Get bytez integration working. --- src/globals.ts | 2 + src/handlers/handlerUtils.ts | 2 +- src/providers/bytez/api.ts | 18 ++++ src/providers/bytez/chatComplete.ts | 59 +++++++++++++ src/providers/bytez/index.ts | 126 ++++++++++++++++++++++++++++ src/providers/index.ts | 2 + src/public/index.html | 2 + src/start-server.ts | 2 +- src/utils.ts | 7 +- 9 files changed, 217 insertions(+), 3 deletions(-) create mode 100644 src/providers/bytez/api.ts create mode 100644 src/providers/bytez/chatComplete.ts create mode 100644 src/providers/bytez/index.ts diff --git a/src/globals.ts b/src/globals.ts index 83b4404da..6e76a99e3 100644 --- a/src/globals.ts +++ b/src/globals.ts @@ -95,6 +95,7 @@ export const LEPTON: string = 'lepton'; export const KLUSTER_AI: string = 'kluster-ai'; export const NSCALE: string = 'nscale'; export const HYPERBOLIC: string = 'hyperbolic'; +export const BYTEZ: string = 'bytez'; export const VALID_PROVIDERS = [ ANTHROPIC, @@ -155,6 +156,7 @@ export const VALID_PROVIDERS = [ KLUSTER_AI, NSCALE, HYPERBOLIC, + BYTEZ, ]; export const CONTENT_TYPES = { diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts index 7388d3e15..1ce731162 100644 --- a/src/handlers/handlerUtils.ts +++ b/src/handlers/handlerUtils.ts @@ -510,7 +510,7 @@ export async function tryPost( body: transformedRequestBody, headers: fetchOptions.headers, }, - requestParams: transformedRequestBody, + requestParams: { ...params, ...transformedRequestBody }, finalUntransformedRequest: { body: params, }, diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts new file mode 100644 index 000000000..e95fb36ae --- /dev/null +++ b/src/providers/bytez/api.ts @@ -0,0 +1,18 @@ +import { ProviderAPIConfig } from '../types'; + +const BytezInferenceAPI: ProviderAPIConfig = { + getBaseURL: () => 'https://api.bytez.com', + headers: async ({ providerOptions }) => { + const { apiKey } = providerOptions; + + const headers: Record = {}; + + headers['Authorization'] = `Key ${apiKey}`; + + return headers; + }, + getEndpoint: ({ gatewayRequestBodyJSON: { version = 2, model } }) => + `/models/v${version}/${model}`, +}; + +export default BytezInferenceAPI; diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts new file mode 100644 index 000000000..f08331324 --- /dev/null +++ b/src/providers/bytez/chatComplete.ts @@ -0,0 +1,59 @@ +import { ProviderConfig } from '../types'; + +export const BytezInferenceChatCompleteConfig: ProviderConfig = { + messages: { + param: 'messages', + default: '', + }, + max_tokens: { + param: 'max_tokens', + default: 100, + min: 0, + }, + max_completion_tokens: { + param: 'max_tokens', + default: 100, + min: 0, + }, + temperature: { + param: 'temperature', + default: 1, + min: 0, + max: 2, + }, + top_p: { + param: 'top_p', + default: 1, + min: 0, + max: 1, + }, + stream: { + param: 'stream', + default: false, + }, + stop: { + param: 'stop', + }, + presence_penalty: { + param: 'presence_penalty', + min: -2, + max: 2, + }, + frequency_penalty: { + param: 'frequency_penalty', + min: -2, + max: 2, + }, + user: { + param: 'user', + }, + tools: { + param: 'tools', + }, + tool_choice: { + param: 'tool_choice', + }, + response_format: { + param: 'response_format', + }, +}; diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts new file mode 100644 index 000000000..f629b352a --- /dev/null +++ b/src/providers/bytez/index.ts @@ -0,0 +1,126 @@ +import crypto from 'node:crypto'; +import { ProviderConfigs } from '../types'; +import BytezInferenceAPI from './api'; +import { BytezInferenceChatCompleteConfig } from './chatComplete'; + +const BASE_URL = 'https://api.bytez.com/models/v2'; + +const BytezInferenceAPIConfig: ProviderConfigs = { + api: BytezInferenceAPI, + chatComplete: BytezInferenceChatCompleteConfig, + requestHandlers: { + chatComplete: async ({ providerOptions, requestBody }) => { + const skipProps: Record = { + model: true, + }; + + const reservedProps: Record = { + stream: true, + messages: true, + text: true, + }; + + const adaptedBody: Record = {}; + const params: Record = {}; + + for (const [key, value] of Object.entries(requestBody)) { + if (skipProps[key]) { + continue; + } + + if (reservedProps[key]) { + adaptedBody[key] = value; + continue; + } + + params[key] = value; + } + + adaptedBody.params = paramsAdapter(params); + + const url = `${BASE_URL}/${requestBody.model}`; + + const response = await fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Key ${providerOptions.apiKey}`, + }, + body: JSON.stringify(adaptedBody), + }); + + if (adaptedBody.stream) { + return new Response(response.body, response); + } + + const { error, output }: { error: string | null; output: object | null } = + await response.json(); + + if (error) { + return new Response( + JSON.stringify({ + // + message: error, + }), + response + ); + } + + return new Response( + JSON.stringify({ + id: crypto.randomUUID(), + object: 'chat.completion', + created: Date.now(), + model: requestBody.model, + choices: [ + { + index: 0, + message: output, + logprobs: null, + finish_reason: 'stop', + }, + ], + usage: { + inferenceTime: response.headers.get('inference-time'), + modelSize: response.headers.get('inference-meter'), + // prompt_tokens: 11, + // completion_tokens: 28, + // total_tokens: 39, + // prompt_tokens_details: { + // cached_tokens: 0, + // audio_tokens: 0, + // }, + // completion_tokens_details: { + // reasoning_tokens: 0, + // audio_tokens: 0, + // accepted_prediction_tokens: 0, + // rejected_prediction_tokens: 0, + // }, + }, + // service_tier: 'default', + // system_fingerprint: 'fp_34a54ae93c', + }), + response + ); + }, + }, +}; + +function paramsAdapter(params: Record) { + const aliasMap: Record = { + max_tokens: 'max_new_tokens', + }; + + for (const key of Object.keys(params)) { + const alias = aliasMap[key]; + + if (alias) { + params[alias] = params[key]; + delete params[key]; + } + } + + return params; +} + +export default BytezInferenceAPIConfig; diff --git a/src/providers/index.ts b/src/providers/index.ts index 11ef1738f..e8e593e8c 100644 --- a/src/providers/index.ts +++ b/src/providers/index.ts @@ -1,3 +1,4 @@ +import BytezConfig from './bytez'; import AI21Config from './ai21'; import AnthropicConfig from './anthropic'; import AnyscaleConfig from './anyscale'; @@ -118,6 +119,7 @@ const Providers: { [key: string]: ProviderConfigs } = { 'kluster-ai': KlusterAIConfig, nscale: NscaleConfig, hyperbolic: HyperbolicConfig, + bytez: BytezConfig, }; export default Providers; diff --git a/src/public/index.html b/src/public/index.html index d98127c37..29901baa6 100644 --- a/src/public/index.html +++ b/src/public/index.html @@ -1106,6 +1106,7 @@

Select Provider

- @@ -1116,6 +1115,7 @@

Select Provider

+
@@ -1457,7 +1457,6 @@

Enter API Key

}; const modelMap = { - "bytez": "google/gemma-3-1b-it", "openai": "gpt-4o-mini", "anthropic": "claude-3-5-sonnet-20240620", "groq": "llama3-70b-8192", @@ -1467,6 +1466,7 @@

Enter API Key

"together-ai": "llama-3.1-8b-instruct", "perplexity-ai": "pplx-7b-online", "mistral-ai": "mistral-small-latest", + "bytez": "google/gemma-3-1b-it", "others": "gpt-4o-mini" } From f9d845602647ffb9c8aa6f31c00157886a3e5042 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Wed, 25 Jun 2025 15:15:09 -0400 Subject: [PATCH 05/15] Add comments to handler utils change for spreading props into requestParams in the context's requestOptions. --- src/handlers/handlerUtils.ts | 7 ++++++- src/start-server.ts | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts index 1ce731162..a1abd1292 100644 --- a/src/handlers/handlerUtils.ts +++ b/src/handlers/handlerUtils.ts @@ -510,7 +510,12 @@ export async function tryPost( body: transformedRequestBody, headers: fetchOptions.headers, }, - requestParams: { ...params, ...transformedRequestBody }, + requestParams: { + // in the event transformedRequestBody request is empty + ...params, + // if this is populated, we will overwrite whatever was initially in params + ...transformedRequestBody, + }, finalUntransformedRequest: { body: params, }, diff --git a/src/start-server.ts b/src/start-server.ts index b12bd2bc7..f58da4231 100644 --- a/src/start-server.ts +++ b/src/start-server.ts @@ -33,7 +33,7 @@ if ( const scriptDir = dirname(fileURLToPath(import.meta.url)); // Serve the index.html content directly for both routes - const indexPath = join(`${scriptDir}/../`, 'public/index.html'); + const indexPath = join(scriptDir, 'public/index.html'); const indexContent = readFileSync(indexPath, 'utf-8'); const serveIndex = (c: Context) => { From 25277c4fe2c44da95364c4f0a643cc56fa51a02a Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Wed, 25 Jun 2025 15:16:26 -0400 Subject: [PATCH 06/15] Expand on comments in handerUtils.ts --- src/handlers/handlerUtils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts index a1abd1292..9584a8a7b 100644 --- a/src/handlers/handlerUtils.ts +++ b/src/handlers/handlerUtils.ts @@ -511,7 +511,7 @@ export async function tryPost( headers: fetchOptions.headers, }, requestParams: { - // in the event transformedRequestBody request is empty + // in the event transformedRequestBody request is empty, e.g. you have opted to handle requests via a custom requestHandler ...params, // if this is populated, we will overwrite whatever was initially in params ...transformedRequestBody, From d41c9b27126dec45c4fdd24d677d4192766bdf97 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Wed, 25 Jun 2025 15:18:36 -0400 Subject: [PATCH 07/15] Expand on comments again. --- src/providers/bytez/chatComplete.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts index b7dd470cd..e7865a66c 100644 --- a/src/providers/bytez/chatComplete.ts +++ b/src/providers/bytez/chatComplete.ts @@ -7,6 +7,7 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = { }, max_tokens: { // NOTE param acts as an alias, it will be added to "params" on the req body + // we do this adaptation ourselves in our custom requestHandler. See src/providers/bytez/index.ts param: 'max_new_tokens', default: 100, min: 0, From 35456b55ad9c7dad456f18803c2f03e2a288f0c3 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Thu, 26 Jun 2025 14:41:29 -0400 Subject: [PATCH 08/15] Add the LRU cache. --- src/providers/bytez/index.ts | 62 +++++++++++++++++++++++++++++++++--- 1 file changed, 58 insertions(+), 4 deletions(-) diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts index 43be7ea35..ed09715b9 100644 --- a/src/providers/bytez/index.ts +++ b/src/providers/bytez/index.ts @@ -5,7 +5,61 @@ import { BytezInferenceChatCompleteConfig } from './chatComplete'; const BASE_URL = 'https://api.bytez.com/models/v2'; -const IS_CHAT_MODEL_CACHE: Record = {}; +class LRUCache { + private size: number; + private map: Map; + + constructor({ size = 100 } = {}) { + this.size = size; + this.map = new Map(); + } + + get(key: K): V | undefined { + if (!this.map.has(key)) return undefined; + + // Move the key to the end to mark it as recently used + const value = this.map.get(key)!; + this.map.delete(key); + this.map.set(key, value); + return value; + } + + set(key: K, value: V): void { + if (this.map.has(key)) { + // Remove the old value to update position + this.map.delete(key); + } else if (this.map.size >= this.size) { + // Remove least recently used (first item in Map) + const lruKey: any = this.map.keys().next().value; + this.map.delete(lruKey); + } + + // Insert the new key-value as most recently used + this.map.set(key, value); + } + + has(key: K): boolean { + return this.map.has(key); + } + + delete(key: K): boolean { + return this.map.delete(key); + } + + keys(): IterableIterator { + return this.map.keys(); + } + + values(): IterableIterator { + return this.map.values(); + } + + get length(): number { + return this.map.size; + } +} + +const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 }); const BytezInferenceAPIConfig: ProviderConfigs = { api: BytezInferenceAPI, @@ -205,8 +259,8 @@ async function validateModelIsChat( headers: Record ) { // return from cache if already validated - if (IS_CHAT_MODEL_CACHE[modelId]) { - return IS_CHAT_MODEL_CACHE[modelId]; + if (IS_CHAT_MODEL_CACHE.has(modelId)) { + return IS_CHAT_MODEL_CACHE.get(modelId); } const url = `${BASE_URL}/list/models?modelId=${modelId}`; @@ -226,7 +280,7 @@ async function validateModelIsChat( const isChatModel = model.task === 'chat'; - IS_CHAT_MODEL_CACHE[modelId] = isChatModel; + IS_CHAT_MODEL_CACHE.set(modelId, isChatModel); return isChatModel; } From ed214e81cc52313f1c7eeba806b3cc849d88fb35 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Thu, 26 Jun 2025 15:09:36 -0400 Subject: [PATCH 09/15] Tidy up PR. --- src/providers/bytez/index.ts | 216 +++++++++++------------------------ src/providers/bytez/types.ts | 11 ++ src/providers/bytez/utils.ts | 55 +++++++++ 3 files changed, 130 insertions(+), 152 deletions(-) create mode 100644 src/providers/bytez/types.ts create mode 100644 src/providers/bytez/utils.ts diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts index ed09715b9..d87e0f53a 100644 --- a/src/providers/bytez/index.ts +++ b/src/providers/bytez/index.ts @@ -2,63 +2,11 @@ import crypto from 'node:crypto'; import { ParameterConfig, ProviderConfigs } from '../types'; import BytezInferenceAPI from './api'; import { BytezInferenceChatCompleteConfig } from './chatComplete'; +import { LRUCache } from './utils'; +import { BytezResponse } from './types'; const BASE_URL = 'https://api.bytez.com/models/v2'; -class LRUCache { - private size: number; - private map: Map; - - constructor({ size = 100 } = {}) { - this.size = size; - this.map = new Map(); - } - - get(key: K): V | undefined { - if (!this.map.has(key)) return undefined; - - // Move the key to the end to mark it as recently used - const value = this.map.get(key)!; - this.map.delete(key); - this.map.set(key, value); - return value; - } - - set(key: K, value: V): void { - if (this.map.has(key)) { - // Remove the old value to update position - this.map.delete(key); - } else if (this.map.size >= this.size) { - // Remove least recently used (first item in Map) - const lruKey: any = this.map.keys().next().value; - this.map.delete(lruKey); - } - - // Insert the new key-value as most recently used - this.map.set(key, value); - } - - has(key: K): boolean { - return this.map.has(key); - } - - delete(key: K): boolean { - return this.map.delete(key); - } - - keys(): IterableIterator { - return this.map.keys(); - } - - values(): IterableIterator { - return this.map.values(); - } - - get length(): number { - return this.map.size; - } -} - const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 }); const BytezInferenceAPIConfig: ProviderConfigs = { @@ -66,114 +14,88 @@ const BytezInferenceAPIConfig: ProviderConfigs = { chatComplete: BytezInferenceChatCompleteConfig, requestHandlers: { chatComplete: async ({ providerOptions, requestBody }) => { - const { model: modelId } = requestBody; + try { + const { model: modelId } = requestBody; - let adaptedBody; + const adaptedBody = bodyAdapter(requestBody); - try { - adaptedBody = bodyAdapter(requestBody); - } catch (error: any) { - return new Response( - JSON.stringify({ - status: 'failure', - message: error.message, - }), - { - status: 500, - headers: { - 'content-type': 'application/json', - }, - } - ); - } + const headers = { + 'Content-Type': 'application/json', + Authorization: `Key ${providerOptions.apiKey}`, + }; - const headers = { - 'Content-Type': 'application/json', - Authorization: `Key ${providerOptions.apiKey}`, - }; + const isChatModel = await validateModelIsChat(modelId, headers); - const isChatModel = await validateModelIsChat(modelId, headers); + if (!isChatModel) { + return constructFailureResponse( + 'Bytez only supports chat models on PortKey', + { status: 400 } + ); + } - if (!isChatModel) { - return new Response( - JSON.stringify({ - status: 'failure', - message: 'Bytez only supports chat models on PortKey', - }), - { - status: 500, - headers: { - 'content-type': 'application/json', - }, - } - ); - } + const url = `${BASE_URL}/${modelId}`; - const url = `${BASE_URL}/${modelId}`; + const response = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify(adaptedBody), + }); - const response = await fetch(url, { - method: 'POST', - headers, - body: JSON.stringify(adaptedBody), - }); + if (adaptedBody.stream) { + return new Response(response.body, response); + } - if (adaptedBody.stream) { - return new Response(response.body, response); - } + const { error, output }: BytezResponse = await response.json(); - const { error, output }: { error: string | null; output: object | null } = - await response.json(); + if (error) { + return constructFailureResponse(error, response); + } - if (error) { return new Response( JSON.stringify({ - // - message: error, + id: crypto.randomUUID(), + object: 'chat.completion', + created: Date.now(), + model: modelId, + choices: [ + { + index: 0, + message: output, + logprobs: null, + finish_reason: 'stop', + }, + ], + usage: { + inferenceTime: response.headers.get('inference-time'), + modelSize: response.headers.get('inference-meter'), + }, }), response ); + } catch (error: any) { + return constructFailureResponse(error.message); } - - return new Response( - JSON.stringify({ - id: crypto.randomUUID(), - object: 'chat.completion', - created: Date.now(), - model: modelId, - choices: [ - { - index: 0, - message: output, - logprobs: null, - finish_reason: 'stop', - }, - ], - usage: { - inferenceTime: response.headers.get('inference-time'), - modelSize: response.headers.get('inference-meter'), - // prompt_tokens: 11, - // completion_tokens: 28, - // total_tokens: 39, - // prompt_tokens_details: { - // cached_tokens: 0, - // audio_tokens: 0, - // }, - // completion_tokens_details: { - // reasoning_tokens: 0, - // audio_tokens: 0, - // accepted_prediction_tokens: 0, - // rejected_prediction_tokens: 0, - // }, - }, - // service_tier: 'default', - // system_fingerprint: 'fp_34a54ae93c', - }), - response - ); }, }, }; +function constructFailureResponse(message: string, response?: object) { + return new Response( + JSON.stringify({ + status: 'failure', + message, + }), + { + status: 500, + headers: { + 'content-type': 'application/json', + }, + // override defaults if desired + ...response, + } + ); +} + function bodyAdapter(requestBody: Record) { for (const [param, paramConfig] of Object.entries( BytezInferenceChatCompleteConfig @@ -244,16 +166,6 @@ function bodyAdapter(requestBody: Record) { return adaptedBody; } -interface Model { - task: string; -} - -interface BytezResponse { - error: string; - output: Model[]; - // add other model properties as needed -} - async function validateModelIsChat( modelId: string, headers: Record diff --git a/src/providers/bytez/types.ts b/src/providers/bytez/types.ts new file mode 100644 index 000000000..58a586abe --- /dev/null +++ b/src/providers/bytez/types.ts @@ -0,0 +1,11 @@ +interface Model { + task: string; +} + +interface BytezResponse { + error: string; + output: Model[]; + // add other model properties as needed +} + +export { Model, BytezResponse }; diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts new file mode 100644 index 000000000..27af31b5b --- /dev/null +++ b/src/providers/bytez/utils.ts @@ -0,0 +1,55 @@ +class LRUCache { + private size: number; + private map: Map; + + constructor({ size = 100 } = {}) { + this.size = size; + this.map = new Map(); + } + + get(key: K): V | undefined { + if (!this.map.has(key)) return undefined; + + // Move the key to the end to mark it as recently used + const value = this.map.get(key)!; + this.map.delete(key); + this.map.set(key, value); + return value; + } + + set(key: K, value: V): void { + if (this.map.has(key)) { + // Remove the old value to update position + this.map.delete(key); + } else if (this.map.size >= this.size) { + // Remove least recently used (first item in Map) + const lruKey: any = this.map.keys().next().value; + this.map.delete(lruKey); + } + + // Insert the new key-value as most recently used + this.map.set(key, value); + } + + has(key: K): boolean { + return this.map.has(key); + } + + delete(key: K): boolean { + return this.map.delete(key); + } + + keys(): IterableIterator { + return this.map.keys(); + } + + values(): IterableIterator { + return this.map.values(); + } + + get length(): number { + return this.map.size; + } +} + +export { LRUCache }; From c60b3673cca2bb39c2a1d1bd4ca39df7012d2143 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Thu, 26 Jun 2025 15:21:14 -0400 Subject: [PATCH 10/15] Final bit of tidying the bytez impl. --- src/providers/bytez/index.ts | 108 ++++++----------------------------- src/providers/bytez/utils.ts | 76 +++++++++++++++++++++++- 2 files changed, 94 insertions(+), 90 deletions(-) diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts index d87e0f53a..982823c45 100644 --- a/src/providers/bytez/index.ts +++ b/src/providers/bytez/index.ts @@ -1,8 +1,8 @@ import crypto from 'node:crypto'; -import { ParameterConfig, ProviderConfigs } from '../types'; +import { ProviderConfigs } from '../types'; import BytezInferenceAPI from './api'; import { BytezInferenceChatCompleteConfig } from './chatComplete'; -import { LRUCache } from './utils'; +import { bodyAdapter, LRUCache } from './utils'; import { BytezResponse } from './types'; const BASE_URL = 'https://api.bytez.com/models/v2'; @@ -79,93 +79,6 @@ const BytezInferenceAPIConfig: ProviderConfigs = { }, }; -function constructFailureResponse(message: string, response?: object) { - return new Response( - JSON.stringify({ - status: 'failure', - message, - }), - { - status: 500, - headers: { - 'content-type': 'application/json', - }, - // override defaults if desired - ...response, - } - ); -} - -function bodyAdapter(requestBody: Record) { - for (const [param, paramConfig] of Object.entries( - BytezInferenceChatCompleteConfig - )) { - const hasParam = Boolean(requestBody[param]); - - // first assign defaults - if (!hasParam) { - const { default: defaultValue, required } = - paramConfig as ParameterConfig; - - // if it's required, throw - if (required) { - throw new Error(`Param ${param} is required`); - } - - // assign the default value - if (defaultValue !== undefined && requestBody[param] === undefined) { - requestBody[param] = defaultValue; - } - } - } - - // now we remap everything that has an alias, i.e. "prop" on propConfig - for (const [key, value] of Object.entries(requestBody)) { - const paramObj = BytezInferenceChatCompleteConfig[key] as - | ParameterConfig - | undefined; - - if (paramObj) { - const { param } = paramObj; - - if (key !== param) { - requestBody[param] = requestBody[key]; - delete requestBody[key]; - } - } - } - - // now we adapt to the bytez input signature - // props to skip - const skipProps: Record = { - model: true, - }; - - // props that cannot be removed from the body - const reservedProps: Record = { - stream: true, - messages: true, - }; - const adaptedBody: Record = { params: {} }; - - for (const [key, value] of Object.entries(requestBody)) { - // things like "model" - if (skipProps[key]) { - continue; - } - - // things like "messages", "stream" - if (reservedProps[key]) { - adaptedBody[key] = value; - continue; - } - // anything else, e.g. max_new_tokens - adaptedBody.params[key] = value; - } - - return adaptedBody; -} - async function validateModelIsChat( modelId: string, headers: Record @@ -197,4 +110,21 @@ async function validateModelIsChat( return isChatModel; } +function constructFailureResponse(message: string, response?: object) { + return new Response( + JSON.stringify({ + status: 'failure', + message, + }), + { + status: 500, + headers: { + 'content-type': 'application/json', + }, + // override defaults if desired + ...response, + } + ); +} + export default BytezInferenceAPIConfig; diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts index 27af31b5b..5f0559a9f 100644 --- a/src/providers/bytez/utils.ts +++ b/src/providers/bytez/utils.ts @@ -1,3 +1,6 @@ +import { ParameterConfig } from '../types'; +import { BytezInferenceChatCompleteConfig } from './chatComplete'; + class LRUCache { private size: number; private map: Map; @@ -52,4 +55,75 @@ class LRUCache { } } -export { LRUCache }; +function bodyAdapter(requestBody: Record) { + for (const [param, paramConfig] of Object.entries( + BytezInferenceChatCompleteConfig + )) { + const hasParam = Boolean(requestBody[param]); + + // first assign defaults + if (!hasParam) { + const { default: defaultValue, required } = + paramConfig as ParameterConfig; + + // if it's required, throw + if (required) { + throw new Error(`Param ${param} is required`); + } + + // assign the default value + if (defaultValue !== undefined && requestBody[param] === undefined) { + requestBody[param] = defaultValue; + } + } + } + + // now we remap everything that has an alias, i.e. "prop" on propConfig + for (const key of Object.keys(requestBody)) { + const paramObj = BytezInferenceChatCompleteConfig[key] as + | ParameterConfig + | undefined; + + if (paramObj) { + const { param: alias } = paramObj; + + if (key !== alias) { + requestBody[alias] = requestBody[key]; + delete requestBody[key]; + } + } + } + + // now we adapt to the bytez input signature + // props to skip + const skipProps: Record = { + model: true, + }; + + // props that cannot be removed from the body + const reservedProps: Record = { + stream: true, + messages: true, + }; + + const adaptedBody: Record = { params: {} }; + + for (const [key, value] of Object.entries(requestBody)) { + // things like "model" + if (skipProps[key]) { + continue; + } + + // things like "messages", "stream" + if (reservedProps[key]) { + adaptedBody[key] = value; + continue; + } + // anything else, e.g. max_new_tokens + adaptedBody.params[key] = value; + } + + return adaptedBody; +} + +export { LRUCache, bodyAdapter }; From d121025b9a18ecb4f98c8b176c60bf0ef937bc19 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Thu, 3 Jul 2025 13:40:32 -0400 Subject: [PATCH 11/15] Remove explicit cypto import. --- src/providers/bytez/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts index 982823c45..1991b20e9 100644 --- a/src/providers/bytez/index.ts +++ b/src/providers/bytez/index.ts @@ -1,4 +1,3 @@ -import crypto from 'node:crypto'; import { ProviderConfigs } from '../types'; import BytezInferenceAPI from './api'; import { BytezInferenceChatCompleteConfig } from './chatComplete'; From 99bff2d3c99d3efe421d202df00ca66c8f7c16c2 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Thu, 3 Jul 2025 13:41:20 -0400 Subject: [PATCH 12/15] Remove comment from bytez types. --- src/providers/bytez/types.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/providers/bytez/types.ts b/src/providers/bytez/types.ts index 58a586abe..1d640ea8b 100644 --- a/src/providers/bytez/types.ts +++ b/src/providers/bytez/types.ts @@ -5,7 +5,6 @@ interface Model { interface BytezResponse { error: string; output: Model[]; - // add other model properties as needed } export { Model, BytezResponse }; From 86aae1e74da461f3707dee250f78ab11c42a93c7 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Wed, 9 Jul 2025 14:44:33 -0400 Subject: [PATCH 13/15] Make Bytez impl conform to the transform spec per maintainer feedback. --- src/handlers/handlerUtils.ts | 7 +- src/providers/bytez/api.ts | 2 + src/providers/bytez/chatComplete.ts | 8 +- src/providers/bytez/index.ts | 155 ++++++++-------------------- src/providers/bytez/utils.ts | 129 ----------------------- 5 files changed, 47 insertions(+), 254 deletions(-) delete mode 100644 src/providers/bytez/utils.ts diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts index 9584a8a7b..7388d3e15 100644 --- a/src/handlers/handlerUtils.ts +++ b/src/handlers/handlerUtils.ts @@ -510,12 +510,7 @@ export async function tryPost( body: transformedRequestBody, headers: fetchOptions.headers, }, - requestParams: { - // in the event transformedRequestBody request is empty, e.g. you have opted to handle requests via a custom requestHandler - ...params, - // if this is populated, we will overwrite whatever was initially in params - ...transformedRequestBody, - }, + requestParams: transformedRequestBody, finalUntransformedRequest: { body: params, }, diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts index e95fb36ae..ee86b3231 100644 --- a/src/providers/bytez/api.ts +++ b/src/providers/bytez/api.ts @@ -1,4 +1,5 @@ import { ProviderAPIConfig } from '../types'; +import { version } from '../../../package.json'; const BytezInferenceAPI: ProviderAPIConfig = { getBaseURL: () => 'https://api.bytez.com', @@ -8,6 +9,7 @@ const BytezInferenceAPI: ProviderAPIConfig = { const headers: Record = {}; headers['Authorization'] = `Key ${apiKey}`; + headers['user-agent'] = `portkey-${version}`; return headers; }, diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts index e7865a66c..519e0a8b4 100644 --- a/src/providers/bytez/chatComplete.ts +++ b/src/providers/bytez/chatComplete.ts @@ -6,20 +6,18 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = { required: true, }, max_tokens: { - // NOTE param acts as an alias, it will be added to "params" on the req body - // we do this adaptation ourselves in our custom requestHandler. See src/providers/bytez/index.ts - param: 'max_new_tokens', + param: 'params.max_new_tokens', default: 100, min: 0, }, temperature: { - param: 'temperature', + param: 'params.temperature', default: 1, min: 0, max: 2, }, top_p: { - param: 'top_p', + param: 'params.top_p', default: 1, min: 0, max: 1, diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts index 1991b20e9..581d2ee1e 100644 --- a/src/providers/bytez/index.ts +++ b/src/providers/bytez/index.ts @@ -1,129 +1,56 @@ +import { BYTEZ } from '../../globals'; import { ProviderConfigs } from '../types'; +import { generateErrorResponse } from '../utils'; import BytezInferenceAPI from './api'; import { BytezInferenceChatCompleteConfig } from './chatComplete'; -import { bodyAdapter, LRUCache } from './utils'; import { BytezResponse } from './types'; -const BASE_URL = 'https://api.bytez.com/models/v2'; - -const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 }); - const BytezInferenceAPIConfig: ProviderConfigs = { api: BytezInferenceAPI, chatComplete: BytezInferenceChatCompleteConfig, - requestHandlers: { - chatComplete: async ({ providerOptions, requestBody }) => { - try { - const { model: modelId } = requestBody; - - const adaptedBody = bodyAdapter(requestBody); - - const headers = { - 'Content-Type': 'application/json', - Authorization: `Key ${providerOptions.apiKey}`, - }; - - const isChatModel = await validateModelIsChat(modelId, headers); - - if (!isChatModel) { - return constructFailureResponse( - 'Bytez only supports chat models on PortKey', - { status: 400 } - ); - } - - const url = `${BASE_URL}/${modelId}`; - - const response = await fetch(url, { - method: 'POST', - headers, - body: JSON.stringify(adaptedBody), - }); - - if (adaptedBody.stream) { - return new Response(response.body, response); - } - - const { error, output }: BytezResponse = await response.json(); - - if (error) { - return constructFailureResponse(error, response); - } - - return new Response( - JSON.stringify({ - id: crypto.randomUUID(), - object: 'chat.completion', - created: Date.now(), - model: modelId, - choices: [ - { - index: 0, - message: output, - logprobs: null, - finish_reason: 'stop', - }, - ], - usage: { - inferenceTime: response.headers.get('inference-time'), - modelSize: response.headers.get('inference-meter'), - }, - }), - response + responseTransforms: { + chatComplete: ( + response: BytezResponse, + responseStatus: number, + responseHeaders: any, + strictOpenAiCompliance: boolean, + endpoint: string, + requestBody: any + ) => { + const { error, output } = response; + + if (error) { + return generateErrorResponse( + { + message: error, + type: String(responseStatus), + param: null, + code: null, + }, + BYTEZ ); - } catch (error: any) { - return constructFailureResponse(error.message); } + + return { + id: crypto.randomUUID(), + object: 'chat.completion', + created: Date.now(), + model: requestBody.model, + choices: [ + { + index: 0, + message: output, + logprobs: null, + finish_reason: 'stop', + }, + ], + usage: { + inferenceTime: responseHeaders.get('inference-time'), + modelSize: responseHeaders.get('inference-meter'), + }, + }; }, }, }; -async function validateModelIsChat( - modelId: string, - headers: Record -) { - // return from cache if already validated - if (IS_CHAT_MODEL_CACHE.has(modelId)) { - return IS_CHAT_MODEL_CACHE.get(modelId); - } - - const url = `${BASE_URL}/list/models?modelId=${modelId}`; - - const response = await fetch(url, { - headers, - }); - - const { - error, - output: [model], - }: BytezResponse = await response.json(); - - if (error) { - throw new Error(error); - } - - const isChatModel = model.task === 'chat'; - - IS_CHAT_MODEL_CACHE.set(modelId, isChatModel); - - return isChatModel; -} - -function constructFailureResponse(message: string, response?: object) { - return new Response( - JSON.stringify({ - status: 'failure', - message, - }), - { - status: 500, - headers: { - 'content-type': 'application/json', - }, - // override defaults if desired - ...response, - } - ); -} - export default BytezInferenceAPIConfig; diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts deleted file mode 100644 index 5f0559a9f..000000000 --- a/src/providers/bytez/utils.ts +++ /dev/null @@ -1,129 +0,0 @@ -import { ParameterConfig } from '../types'; -import { BytezInferenceChatCompleteConfig } from './chatComplete'; - -class LRUCache { - private size: number; - private map: Map; - - constructor({ size = 100 } = {}) { - this.size = size; - this.map = new Map(); - } - - get(key: K): V | undefined { - if (!this.map.has(key)) return undefined; - - // Move the key to the end to mark it as recently used - const value = this.map.get(key)!; - this.map.delete(key); - this.map.set(key, value); - return value; - } - - set(key: K, value: V): void { - if (this.map.has(key)) { - // Remove the old value to update position - this.map.delete(key); - } else if (this.map.size >= this.size) { - // Remove least recently used (first item in Map) - const lruKey: any = this.map.keys().next().value; - this.map.delete(lruKey); - } - - // Insert the new key-value as most recently used - this.map.set(key, value); - } - - has(key: K): boolean { - return this.map.has(key); - } - - delete(key: K): boolean { - return this.map.delete(key); - } - - keys(): IterableIterator { - return this.map.keys(); - } - - values(): IterableIterator { - return this.map.values(); - } - - get length(): number { - return this.map.size; - } -} - -function bodyAdapter(requestBody: Record) { - for (const [param, paramConfig] of Object.entries( - BytezInferenceChatCompleteConfig - )) { - const hasParam = Boolean(requestBody[param]); - - // first assign defaults - if (!hasParam) { - const { default: defaultValue, required } = - paramConfig as ParameterConfig; - - // if it's required, throw - if (required) { - throw new Error(`Param ${param} is required`); - } - - // assign the default value - if (defaultValue !== undefined && requestBody[param] === undefined) { - requestBody[param] = defaultValue; - } - } - } - - // now we remap everything that has an alias, i.e. "prop" on propConfig - for (const key of Object.keys(requestBody)) { - const paramObj = BytezInferenceChatCompleteConfig[key] as - | ParameterConfig - | undefined; - - if (paramObj) { - const { param: alias } = paramObj; - - if (key !== alias) { - requestBody[alias] = requestBody[key]; - delete requestBody[key]; - } - } - } - - // now we adapt to the bytez input signature - // props to skip - const skipProps: Record = { - model: true, - }; - - // props that cannot be removed from the body - const reservedProps: Record = { - stream: true, - messages: true, - }; - - const adaptedBody: Record = { params: {} }; - - for (const [key, value] of Object.entries(requestBody)) { - // things like "model" - if (skipProps[key]) { - continue; - } - - // things like "messages", "stream" - if (reservedProps[key]) { - adaptedBody[key] = value; - continue; - } - // anything else, e.g. max_new_tokens - adaptedBody.params[key] = value; - } - - return adaptedBody; -} - -export { LRUCache, bodyAdapter }; From a6070dc052a49f1b4b05f14ec46fe79beb5645a6 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Wed, 9 Jul 2025 15:08:03 -0400 Subject: [PATCH 14/15] Update user agent string for bytez provider config. --- src/providers/bytez/api.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts index ee86b3231..c54ab546e 100644 --- a/src/providers/bytez/api.ts +++ b/src/providers/bytez/api.ts @@ -9,7 +9,7 @@ const BytezInferenceAPI: ProviderAPIConfig = { const headers: Record = {}; headers['Authorization'] = `Key ${apiKey}`; - headers['user-agent'] = `portkey-${version}`; + headers['user-agent'] = `portkey/${version}`; return headers; }, From 73f5c05cb37ecf9e29333023c8cb5dcc8f1a7cd7 Mon Sep 17 00:00:00 2001 From: Aaron Vogler Date: Fri, 11 Jul 2025 14:48:37 -0400 Subject: [PATCH 15/15] Move bytez chatComplete responseTransform into chatComplete.ts, conform to openai compliant spec. --- src/providers/bytez/chatComplete.ts | 48 ++++++++++++++++++++++++++++- src/providers/bytez/index.ts | 47 ++-------------------------- 2 files changed, 49 insertions(+), 46 deletions(-) diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts index 519e0a8b4..5c2c1756f 100644 --- a/src/providers/bytez/chatComplete.ts +++ b/src/providers/bytez/chatComplete.ts @@ -1,4 +1,7 @@ +import { BYTEZ } from '../../globals'; import { ProviderConfig } from '../types'; +import { BytezResponse } from './types'; +import { generateErrorResponse } from '../utils'; const BytezInferenceChatCompleteConfig: ProviderConfig = { messages: { @@ -28,4 +31,47 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = { }, }; -export { BytezInferenceChatCompleteConfig }; +function chatComplete( + response: BytezResponse, + responseStatus: number, + responseHeaders: any, + strictOpenAiCompliance: boolean, + endpoint: string, + requestBody: any +) { + const { error, output } = response; + + if (error) { + return generateErrorResponse( + { + message: error, + type: String(responseStatus), + param: null, + code: null, + }, + BYTEZ + ); + } + + return { + id: crypto.randomUUID(), + object: 'chat.completion', + created: Date.now(), + model: requestBody.model, + choices: [ + { + index: 0, + message: output, + logprobs: null, + finish_reason: 'stop', + }, + ], + usage: { + completion_tokens: -1, + prompt_tokens: -1, + total_tokens: -1, + }, + }; +} + +export { BytezInferenceChatCompleteConfig, chatComplete }; diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts index 581d2ee1e..2b1782bec 100644 --- a/src/providers/bytez/index.ts +++ b/src/providers/bytez/index.ts @@ -1,55 +1,12 @@ -import { BYTEZ } from '../../globals'; import { ProviderConfigs } from '../types'; -import { generateErrorResponse } from '../utils'; import BytezInferenceAPI from './api'; -import { BytezInferenceChatCompleteConfig } from './chatComplete'; -import { BytezResponse } from './types'; +import { BytezInferenceChatCompleteConfig, chatComplete } from './chatComplete'; const BytezInferenceAPIConfig: ProviderConfigs = { api: BytezInferenceAPI, chatComplete: BytezInferenceChatCompleteConfig, responseTransforms: { - chatComplete: ( - response: BytezResponse, - responseStatus: number, - responseHeaders: any, - strictOpenAiCompliance: boolean, - endpoint: string, - requestBody: any - ) => { - const { error, output } = response; - - if (error) { - return generateErrorResponse( - { - message: error, - type: String(responseStatus), - param: null, - code: null, - }, - BYTEZ - ); - } - - return { - id: crypto.randomUUID(), - object: 'chat.completion', - created: Date.now(), - model: requestBody.model, - choices: [ - { - index: 0, - message: output, - logprobs: null, - finish_reason: 'stop', - }, - ], - usage: { - inferenceTime: responseHeaders.get('inference-time'), - modelSize: responseHeaders.get('inference-meter'), - }, - }; - }, + chatComplete, }, };