From 82f579459c951bb44f8f2be1480fc09c06b6db6d Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Tue, 24 Jun 2025 20:28:31 -0400
Subject: [PATCH 01/15] Get bytez integration working.

---
 src/globals.ts                      |   2 +
 src/handlers/handlerUtils.ts        |   2 +-
 src/providers/bytez/api.ts          |  18 ++++
 src/providers/bytez/chatComplete.ts |  59 +++++++++++++
 src/providers/bytez/index.ts        | 126 ++++++++++++++++++++++++++++
 src/providers/index.ts              |   2 +
 src/public/index.html               |   2 +
 src/start-server.ts                 |   2 +-
 src/utils.ts                        |   7 +-
 9 files changed, 217 insertions(+), 3 deletions(-)
 create mode 100644 src/providers/bytez/api.ts
 create mode 100644 src/providers/bytez/chatComplete.ts
 create mode 100644 src/providers/bytez/index.ts

diff --git a/src/globals.ts b/src/globals.ts
index 83b4404da..6e76a99e3 100644
--- a/src/globals.ts
+++ b/src/globals.ts
@@ -95,6 +95,7 @@ export const LEPTON: string = 'lepton';
 export const KLUSTER_AI: string = 'kluster-ai';
 export const NSCALE: string = 'nscale';
 export const HYPERBOLIC: string = 'hyperbolic';
+export const BYTEZ: string = 'bytez';
 
 export const VALID_PROVIDERS = [
   ANTHROPIC,
@@ -155,6 +156,7 @@ export const VALID_PROVIDERS = [
   KLUSTER_AI,
   NSCALE,
   HYPERBOLIC,
+  BYTEZ,
 ];
 
 export const CONTENT_TYPES = {
diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
index 7388d3e15..1ce731162 100644
--- a/src/handlers/handlerUtils.ts
+++ b/src/handlers/handlerUtils.ts
@@ -510,7 +510,7 @@ export async function tryPost(
           body: transformedRequestBody,
           headers: fetchOptions.headers,
         },
-        requestParams: transformedRequestBody,
+        requestParams: { ...params, ...transformedRequestBody },
         finalUntransformedRequest: {
           body: params,
         },
diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts
new file mode 100644
index 000000000..e95fb36ae
--- /dev/null
+++ b/src/providers/bytez/api.ts
@@ -0,0 +1,18 @@
+import { ProviderAPIConfig } from '../types';
+
+const BytezInferenceAPI: ProviderAPIConfig = {
+  getBaseURL: () => 'https://api.bytez.com',
+  headers: async ({ providerOptions }) => {
+    const { apiKey } = providerOptions;
+
+    const headers: Record<string, string> = {};
+
+    headers['Authorization'] = `Key ${apiKey}`;
+
+    return headers;
+  },
+  getEndpoint: ({ gatewayRequestBodyJSON: { version = 2, model } }) =>
+    `/models/v${version}/${model}`,
+};
+
+export default BytezInferenceAPI;
diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
new file mode 100644
index 000000000..f08331324
--- /dev/null
+++ b/src/providers/bytez/chatComplete.ts
@@ -0,0 +1,59 @@
+import { ProviderConfig } from '../types';
+
+export const BytezInferenceChatCompleteConfig: ProviderConfig = {
+  messages: {
+    param: 'messages',
+    default: '',
+  },
+  max_tokens: {
+    param: 'max_tokens',
+    default: 100,
+    min: 0,
+  },
+  max_completion_tokens: {
+    param: 'max_tokens',
+    default: 100,
+    min: 0,
+  },
+  temperature: {
+    param: 'temperature',
+    default: 1,
+    min: 0,
+    max: 2,
+  },
+  top_p: {
+    param: 'top_p',
+    default: 1,
+    min: 0,
+    max: 1,
+  },
+  stream: {
+    param: 'stream',
+    default: false,
+  },
+  stop: {
+    param: 'stop',
+  },
+  presence_penalty: {
+    param: 'presence_penalty',
+    min: -2,
+    max: 2,
+  },
+  frequency_penalty: {
+    param: 'frequency_penalty',
+    min: -2,
+    max: 2,
+  },
+  user: {
+    param: 'user',
+  },
+  tools: {
+    param: 'tools',
+  },
+  tool_choice: {
+    param: 'tool_choice',
+  },
+  response_format: {
+    param: 'response_format',
+  },
+};
diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
new file mode 100644
index 000000000..f629b352a
--- /dev/null
+++ b/src/providers/bytez/index.ts
@@ -0,0 +1,126 @@
+import crypto from 'node:crypto';
+import { ProviderConfigs } from '../types';
+import BytezInferenceAPI from './api';
+import { BytezInferenceChatCompleteConfig } from './chatComplete';
+
+const BASE_URL = 'https://api.bytez.com/models/v2';
+
+const BytezInferenceAPIConfig: ProviderConfigs = {
+  api: BytezInferenceAPI,
+  chatComplete: BytezInferenceChatCompleteConfig,
+  requestHandlers: {
+    chatComplete: async ({ providerOptions, requestBody }) => {
+      const skipProps: Record<string, boolean> = {
+        model: true,
+      };
+
+      const reservedProps: Record<string, boolean> = {
+        stream: true,
+        messages: true,
+        text: true,
+      };
+
+      const adaptedBody: Record<string, any> = {};
+      const params: Record<string, any> = {};
+
+      for (const [key, value] of Object.entries(requestBody)) {
+        if (skipProps[key]) {
+          continue;
+        }
+
+        if (reservedProps[key]) {
+          adaptedBody[key] = value;
+          continue;
+        }
+
+        params[key] = value;
+      }
+
+      adaptedBody.params = paramsAdapter(params);
+
+      const url = `${BASE_URL}/${requestBody.model}`;
+
+      const response = await fetch(url, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          Authorization: `Key ${providerOptions.apiKey}`,
+        },
+        body: JSON.stringify(adaptedBody),
+      });
+
+      if (adaptedBody.stream) {
+        return new Response(response.body, response);
+      }
+
+      const { error, output }: { error: string | null; output: object | null } =
+        await response.json();
+
+      if (error) {
+        return new Response(
+          JSON.stringify({
+            //
+            message: error,
+          }),
+          response
+        );
+      }
+
+      return new Response(
+        JSON.stringify({
+          id: crypto.randomUUID(),
+          object: 'chat.completion',
+          created: Date.now(),
+          model: requestBody.model,
+          choices: [
+            {
+              index: 0,
+              message: output,
+              logprobs: null,
+              finish_reason: 'stop',
+            },
+          ],
+          usage: {
+            inferenceTime: response.headers.get('inference-time'),
+            modelSize: response.headers.get('inference-meter'),
+            // prompt_tokens: 11,
+            // completion_tokens: 28,
+            // total_tokens: 39,
+            // prompt_tokens_details: {
+            //   cached_tokens: 0,
+            //   audio_tokens: 0,
+            // },
+            // completion_tokens_details: {
+            //   reasoning_tokens: 0,
+            //   audio_tokens: 0,
+            //   accepted_prediction_tokens: 0,
+            //   rejected_prediction_tokens: 0,
+            // },
+          },
+          // service_tier: 'default',
+          // system_fingerprint: 'fp_34a54ae93c',
+        }),
+        response
+      );
+    },
+  },
+};
+
+function paramsAdapter(params: Record<string, any>) {
+  const aliasMap: Record<string, any> = {
+    max_tokens: 'max_new_tokens',
+  };
+
+  for (const key of Object.keys(params)) {
+    const alias = aliasMap[key];
+
+    if (alias) {
+      params[alias] = params[key];
+      delete params[key];
+    }
+  }
+
+  return params;
+}
+
+export default BytezInferenceAPIConfig;
diff --git a/src/providers/index.ts b/src/providers/index.ts
index 11ef1738f..e8e593e8c 100644
--- a/src/providers/index.ts
+++ b/src/providers/index.ts
@@ -1,3 +1,4 @@
+import BytezConfig from './bytez';
 import AI21Config from './ai21';
 import AnthropicConfig from './anthropic';
 import AnyscaleConfig from './anyscale';
@@ -118,6 +119,7 @@ const Providers: { [key: string]: ProviderConfigs } = {
   'kluster-ai': KlusterAIConfig,
   nscale: NscaleConfig,
   hyperbolic: HyperbolicConfig,
+  bytez: BytezConfig,
 };
 
 export default Providers;
diff --git a/src/public/index.html b/src/public/index.html
index d98127c37..29901baa6 100644
--- a/src/public/index.html
+++ b/src/public/index.html
@@ -1106,6 +1106,7 @@ <h3>Select Provider</h3>
         <div class="select-wrapper">
           <select id="providerSelect" class="select">
             <option value="">Select a provider</option>
+            <option value="bytez">Bytez</option>
             <option value="openai">OpenAI</option>
             <option value="anthropic">Anthropic</option>
             <option value="groq">Groq</option>
@@ -1456,6 +1457,7 @@ <h3>Enter API Key</h3>
       };
 
       const modelMap = {
+        "bytez": "google/gemma-3-1b-it",
         "openai": "gpt-4o-mini",
         "anthropic": "claude-3-5-sonnet-20240620",
         "groq": "llama3-70b-8192",
diff --git a/src/start-server.ts b/src/start-server.ts
index f58da4231..b12bd2bc7 100644
--- a/src/start-server.ts
+++ b/src/start-server.ts
@@ -33,7 +33,7 @@ if (
     const scriptDir = dirname(fileURLToPath(import.meta.url));
 
     // Serve the index.html content directly for both routes
-    const indexPath = join(scriptDir, 'public/index.html');
+    const indexPath = join(`${scriptDir}/../`, 'public/index.html');
     const indexContent = readFileSync(indexPath, 'utf-8');
 
     const serveIndex = (c: Context) => {
diff --git a/src/utils.ts b/src/utils.ts
index ba42f7545..d32896f62 100644
--- a/src/utils.ts
+++ b/src/utils.ts
@@ -7,6 +7,7 @@ import {
   DEEPINFRA,
   SAMBANOVA,
   BEDROCK,
+  BYTEZ,
 } from './globals';
 import { Params } from './types/requestBody';
 
@@ -48,9 +49,13 @@ export const getStreamModeSplitPattern = (
     splitPattern = '\n';
   }
 
+  if (proxyProvider === BYTEZ) {
+    splitPattern = ' ';
+  }
+
   return splitPattern;
 };
-export type SplitPatternType = '\n\n' | '\r\n\r\n' | '\n' | '\r\n';
+export type SplitPatternType = '\n\n' | '\r\n\r\n' | '\n' | '\r\n' | ' ';
 
 export const getStreamingMode = (
   reqBody: Params,

From 2174215ca9066bd0c569a9bb17abc1cbbe103565 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 25 Jun 2025 15:03:27 -0400
Subject: [PATCH 02/15] Restrict Bytez integration to only allow compatability
 with chat models.

---
 src/providers/bytez/chatComplete.ts |  39 +-----
 src/providers/bytez/index.ts        | 182 ++++++++++++++++++++++------
 2 files changed, 151 insertions(+), 70 deletions(-)

diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
index f08331324..898d72d63 100644
--- a/src/providers/bytez/chatComplete.ts
+++ b/src/providers/bytez/chatComplete.ts
@@ -1,17 +1,13 @@
 import { ProviderConfig } from '../types';
 
-export const BytezInferenceChatCompleteConfig: ProviderConfig = {
+const BytezInferenceChatCompleteConfig: ProviderConfig = {
   messages: {
     param: 'messages',
-    default: '',
+    required: true,
   },
   max_tokens: {
-    param: 'max_tokens',
-    default: 100,
-    min: 0,
-  },
-  max_completion_tokens: {
-    param: 'max_tokens',
+    // NOTE param acts as an alias, it will be added to "params" oon the req body
+    param: 'max_new_tokens',
     default: 100,
     min: 0,
   },
@@ -31,29 +27,6 @@ export const BytezInferenceChatCompleteConfig: ProviderConfig = {
     param: 'stream',
     default: false,
   },
-  stop: {
-    param: 'stop',
-  },
-  presence_penalty: {
-    param: 'presence_penalty',
-    min: -2,
-    max: 2,
-  },
-  frequency_penalty: {
-    param: 'frequency_penalty',
-    min: -2,
-    max: 2,
-  },
-  user: {
-    param: 'user',
-  },
-  tools: {
-    param: 'tools',
-  },
-  tool_choice: {
-    param: 'tool_choice',
-  },
-  response_format: {
-    param: 'response_format',
-  },
 };
+
+export { BytezInferenceChatCompleteConfig };
diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index f629b352a..43be7ea35 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -1,51 +1,65 @@
 import crypto from 'node:crypto';
-import { ProviderConfigs } from '../types';
+import { ParameterConfig, ProviderConfigs } from '../types';
 import BytezInferenceAPI from './api';
 import { BytezInferenceChatCompleteConfig } from './chatComplete';
 
 const BASE_URL = 'https://api.bytez.com/models/v2';
 
+const IS_CHAT_MODEL_CACHE: Record<string, boolean> = {};
+
 const BytezInferenceAPIConfig: ProviderConfigs = {
   api: BytezInferenceAPI,
   chatComplete: BytezInferenceChatCompleteConfig,
   requestHandlers: {
     chatComplete: async ({ providerOptions, requestBody }) => {
-      const skipProps: Record<string, boolean> = {
-        model: true,
-      };
+      const { model: modelId } = requestBody;
 
-      const reservedProps: Record<string, boolean> = {
-        stream: true,
-        messages: true,
-        text: true,
-      };
+      let adaptedBody;
 
-      const adaptedBody: Record<string, any> = {};
-      const params: Record<string, any> = {};
+      try {
+        adaptedBody = bodyAdapter(requestBody);
+      } catch (error: any) {
+        return new Response(
+          JSON.stringify({
+            status: 'failure',
+            message: error.message,
+          }),
+          {
+            status: 500,
+            headers: {
+              'content-type': 'application/json',
+            },
+          }
+        );
+      }
 
-      for (const [key, value] of Object.entries(requestBody)) {
-        if (skipProps[key]) {
-          continue;
-        }
+      const headers = {
+        'Content-Type': 'application/json',
+        Authorization: `Key ${providerOptions.apiKey}`,
+      };
 
-        if (reservedProps[key]) {
-          adaptedBody[key] = value;
-          continue;
-        }
+      const isChatModel = await validateModelIsChat(modelId, headers);
 
-        params[key] = value;
+      if (!isChatModel) {
+        return new Response(
+          JSON.stringify({
+            status: 'failure',
+            message: 'Bytez only supports chat models on PortKey',
+          }),
+          {
+            status: 500,
+            headers: {
+              'content-type': 'application/json',
+            },
+          }
+        );
       }
 
-      adaptedBody.params = paramsAdapter(params);
-
-      const url = `${BASE_URL}/${requestBody.model}`;
+      const url = `${BASE_URL}/${modelId}`;
 
       const response = await fetch(url, {
         method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          Authorization: `Key ${providerOptions.apiKey}`,
-        },
+        headers,
         body: JSON.stringify(adaptedBody),
       });
 
@@ -71,7 +85,7 @@ const BytezInferenceAPIConfig: ProviderConfigs = {
           id: crypto.randomUUID(),
           object: 'chat.completion',
           created: Date.now(),
-          model: requestBody.model,
+          model: modelId,
           choices: [
             {
               index: 0,
@@ -106,21 +120,115 @@ const BytezInferenceAPIConfig: ProviderConfigs = {
   },
 };
 
-function paramsAdapter(params: Record<string, any>) {
-  const aliasMap: Record<string, any> = {
-    max_tokens: 'max_new_tokens',
+function bodyAdapter(requestBody: Record<string, any>) {
+  for (const [param, paramConfig] of Object.entries(
+    BytezInferenceChatCompleteConfig
+  )) {
+    const hasParam = Boolean(requestBody[param]);
+
+    // first assign defaults
+    if (!hasParam) {
+      const { default: defaultValue, required } =
+        paramConfig as ParameterConfig;
+
+      // if it's required, throw
+      if (required) {
+        throw new Error(`Param ${param} is required`);
+      }
+
+      // assign the default value
+      if (defaultValue !== undefined && requestBody[param] === undefined) {
+        requestBody[param] = defaultValue;
+      }
+    }
+  }
+
+  // now we remap everything that has an alias, i.e. "prop" on propConfig
+  for (const [key, value] of Object.entries(requestBody)) {
+    const paramObj = BytezInferenceChatCompleteConfig[key] as
+      | ParameterConfig
+      | undefined;
+
+    if (paramObj) {
+      const { param } = paramObj;
+
+      if (key !== param) {
+        requestBody[param] = requestBody[key];
+        delete requestBody[key];
+      }
+    }
+  }
+
+  // now we adapt to the bytez input signature
+  // props to skip
+  const skipProps: Record<string, boolean> = {
+    model: true,
   };
 
-  for (const key of Object.keys(params)) {
-    const alias = aliasMap[key];
+  // props that cannot be removed from the body
+  const reservedProps: Record<string, boolean> = {
+    stream: true,
+    messages: true,
+  };
+  const adaptedBody: Record<string, any> = { params: {} };
+
+  for (const [key, value] of Object.entries(requestBody)) {
+    // things like "model"
+    if (skipProps[key]) {
+      continue;
+    }
 
-    if (alias) {
-      params[alias] = params[key];
-      delete params[key];
+    // things like "messages", "stream"
+    if (reservedProps[key]) {
+      adaptedBody[key] = value;
+      continue;
     }
+    // anything else, e.g. max_new_tokens
+    adaptedBody.params[key] = value;
+  }
+
+  return adaptedBody;
+}
+
+interface Model {
+  task: string;
+}
+
+interface BytezResponse {
+  error: string;
+  output: Model[];
+  // add other model properties as needed
+}
+
+async function validateModelIsChat(
+  modelId: string,
+  headers: Record<string, any>
+) {
+  // return from cache if already validated
+  if (IS_CHAT_MODEL_CACHE[modelId]) {
+    return IS_CHAT_MODEL_CACHE[modelId];
+  }
+
+  const url = `${BASE_URL}/list/models?modelId=${modelId}`;
+
+  const response = await fetch(url, {
+    headers,
+  });
+
+  const {
+    error,
+    output: [model],
+  }: BytezResponse = await response.json();
+
+  if (error) {
+    throw new Error(error);
   }
 
-  return params;
+  const isChatModel = model.task === 'chat';
+
+  IS_CHAT_MODEL_CACHE[modelId] = isChatModel;
+
+  return isChatModel;
 }
 
 export default BytezInferenceAPIConfig;

From 33c9168f5730b744076056404bb5a7ec27784371 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 25 Jun 2025 15:07:28 -0400
Subject: [PATCH 03/15] Cleanup comment in chatComplete.ts

---
 src/providers/bytez/chatComplete.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
index 898d72d63..b7dd470cd 100644
--- a/src/providers/bytez/chatComplete.ts
+++ b/src/providers/bytez/chatComplete.ts
@@ -6,7 +6,7 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = {
     required: true,
   },
   max_tokens: {
-    // NOTE param acts as an alias, it will be added to "params" oon the req body
+    // NOTE param acts as an alias, it will be added to "params" on the req body
     param: 'max_new_tokens',
     default: 100,
     min: 0,

From 196299d057dd53bbe1e1105ae47a8d5f24c505e0 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 25 Jun 2025 15:11:01 -0400
Subject: [PATCH 04/15] Reorder bytez in public.html

---
 src/public/index.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/public/index.html b/src/public/index.html
index 29901baa6..7f260c88b 100644
--- a/src/public/index.html
+++ b/src/public/index.html
@@ -1106,7 +1106,6 @@ <h3>Select Provider</h3>
         <div class="select-wrapper">
           <select id="providerSelect" class="select">
             <option value="">Select a provider</option>
-            <option value="bytez">Bytez</option>
             <option value="openai">OpenAI</option>
             <option value="anthropic">Anthropic</option>
             <option value="groq">Groq</option>
@@ -1116,6 +1115,7 @@ <h3>Select Provider</h3>
             <option value="together-ai">Together AI</option>
             <option value="perplexity-ai">Perplexity AI</option>
             <option value="mistral-ai">Mistral AI</option>
+            <option value="bytez">Bytez</option>
             <option value="others">Others</option>
           </select>
         </div>
@@ -1457,7 +1457,6 @@ <h3>Enter API Key</h3>
       };
 
       const modelMap = {
-        "bytez": "google/gemma-3-1b-it",
         "openai": "gpt-4o-mini",
         "anthropic": "claude-3-5-sonnet-20240620",
         "groq": "llama3-70b-8192",
@@ -1467,6 +1466,7 @@ <h3>Enter API Key</h3>
         "together-ai": "llama-3.1-8b-instruct",
         "perplexity-ai": "pplx-7b-online",
         "mistral-ai": "mistral-small-latest",
+        "bytez": "google/gemma-3-1b-it",
         "others": "gpt-4o-mini"
       }
 

From f9d845602647ffb9c8aa6f31c00157886a3e5042 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 25 Jun 2025 15:15:09 -0400
Subject: [PATCH 05/15] Add comments to handler utils change for spreading
 props into requestParams in the context's requestOptions.

---
 src/handlers/handlerUtils.ts | 7 ++++++-
 src/start-server.ts          | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
index 1ce731162..a1abd1292 100644
--- a/src/handlers/handlerUtils.ts
+++ b/src/handlers/handlerUtils.ts
@@ -510,7 +510,12 @@ export async function tryPost(
           body: transformedRequestBody,
           headers: fetchOptions.headers,
         },
-        requestParams: { ...params, ...transformedRequestBody },
+        requestParams: {
+          // in the event transformedRequestBody request is empty
+          ...params,
+          // if this is populated, we will overwrite whatever was initially in params
+          ...transformedRequestBody,
+        },
         finalUntransformedRequest: {
           body: params,
         },
diff --git a/src/start-server.ts b/src/start-server.ts
index b12bd2bc7..f58da4231 100644
--- a/src/start-server.ts
+++ b/src/start-server.ts
@@ -33,7 +33,7 @@ if (
     const scriptDir = dirname(fileURLToPath(import.meta.url));
 
     // Serve the index.html content directly for both routes
-    const indexPath = join(`${scriptDir}/../`, 'public/index.html');
+    const indexPath = join(scriptDir, 'public/index.html');
     const indexContent = readFileSync(indexPath, 'utf-8');
 
     const serveIndex = (c: Context) => {

From 25277c4fe2c44da95364c4f0a643cc56fa51a02a Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 25 Jun 2025 15:16:26 -0400
Subject: [PATCH 06/15] Expand on comments in handerUtils.ts

---
 src/handlers/handlerUtils.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
index a1abd1292..9584a8a7b 100644
--- a/src/handlers/handlerUtils.ts
+++ b/src/handlers/handlerUtils.ts
@@ -511,7 +511,7 @@ export async function tryPost(
           headers: fetchOptions.headers,
         },
         requestParams: {
-          // in the event transformedRequestBody request is empty
+          // in the event transformedRequestBody request is empty, e.g. you have opted to handle requests via a custom requestHandler
           ...params,
           // if this is populated, we will overwrite whatever was initially in params
           ...transformedRequestBody,

From d41c9b27126dec45c4fdd24d677d4192766bdf97 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 25 Jun 2025 15:18:36 -0400
Subject: [PATCH 07/15] Expand on comments again.

---
 src/providers/bytez/chatComplete.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
index b7dd470cd..e7865a66c 100644
--- a/src/providers/bytez/chatComplete.ts
+++ b/src/providers/bytez/chatComplete.ts
@@ -7,6 +7,7 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = {
   },
   max_tokens: {
     // NOTE param acts as an alias, it will be added to "params" on the req body
+    // we do this adaptation ourselves in our custom requestHandler. See src/providers/bytez/index.ts
     param: 'max_new_tokens',
     default: 100,
     min: 0,

From 35456b55ad9c7dad456f18803c2f03e2a288f0c3 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Thu, 26 Jun 2025 14:41:29 -0400
Subject: [PATCH 08/15] Add the LRU cache.

---
 src/providers/bytez/index.ts | 62 +++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index 43be7ea35..ed09715b9 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -5,7 +5,61 @@ import { BytezInferenceChatCompleteConfig } from './chatComplete';
 
 const BASE_URL = 'https://api.bytez.com/models/v2';
 
-const IS_CHAT_MODEL_CACHE: Record<string, boolean> = {};
+class LRUCache<K, V> {
+  private size: number;
+  private map: Map<K, V>;
+
+  constructor({ size = 100 } = {}) {
+    this.size = size;
+    this.map = new Map();
+  }
+
+  get(key: K): V | undefined {
+    if (!this.map.has(key)) return undefined;
+
+    // Move the key to the end to mark it as recently used
+    const value = this.map.get(key)!;
+    this.map.delete(key);
+    this.map.set(key, value);
+    return value;
+  }
+
+  set(key: K, value: V): void {
+    if (this.map.has(key)) {
+      // Remove the old value to update position
+      this.map.delete(key);
+    } else if (this.map.size >= this.size) {
+      // Remove least recently used (first item in Map)
+      const lruKey: any = this.map.keys().next().value;
+      this.map.delete(lruKey);
+    }
+
+    // Insert the new key-value as most recently used
+    this.map.set(key, value);
+  }
+
+  has(key: K): boolean {
+    return this.map.has(key);
+  }
+
+  delete(key: K): boolean {
+    return this.map.delete(key);
+  }
+
+  keys(): IterableIterator<K> {
+    return this.map.keys();
+  }
+
+  values(): IterableIterator<V> {
+    return this.map.values();
+  }
+
+  get length(): number {
+    return this.map.size;
+  }
+}
+
+const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 });
 
 const BytezInferenceAPIConfig: ProviderConfigs = {
   api: BytezInferenceAPI,
@@ -205,8 +259,8 @@ async function validateModelIsChat(
   headers: Record<string, any>
 ) {
   // return from cache if already validated
-  if (IS_CHAT_MODEL_CACHE[modelId]) {
-    return IS_CHAT_MODEL_CACHE[modelId];
+  if (IS_CHAT_MODEL_CACHE.has(modelId)) {
+    return IS_CHAT_MODEL_CACHE.get(modelId);
   }
 
   const url = `${BASE_URL}/list/models?modelId=${modelId}`;
@@ -226,7 +280,7 @@ async function validateModelIsChat(
 
   const isChatModel = model.task === 'chat';
 
-  IS_CHAT_MODEL_CACHE[modelId] = isChatModel;
+  IS_CHAT_MODEL_CACHE.set(modelId, isChatModel);
 
   return isChatModel;
 }

From ed214e81cc52313f1c7eeba806b3cc849d88fb35 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Thu, 26 Jun 2025 15:09:36 -0400
Subject: [PATCH 09/15] Tidy up PR.

---
 src/providers/bytez/index.ts | 216 +++++++++++------------------------
 src/providers/bytez/types.ts |  11 ++
 src/providers/bytez/utils.ts |  55 +++++++++
 3 files changed, 130 insertions(+), 152 deletions(-)
 create mode 100644 src/providers/bytez/types.ts
 create mode 100644 src/providers/bytez/utils.ts

diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index ed09715b9..d87e0f53a 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -2,63 +2,11 @@ import crypto from 'node:crypto';
 import { ParameterConfig, ProviderConfigs } from '../types';
 import BytezInferenceAPI from './api';
 import { BytezInferenceChatCompleteConfig } from './chatComplete';
+import { LRUCache } from './utils';
+import { BytezResponse } from './types';
 
 const BASE_URL = 'https://api.bytez.com/models/v2';
 
-class LRUCache<K, V> {
-  private size: number;
-  private map: Map<K, V>;
-
-  constructor({ size = 100 } = {}) {
-    this.size = size;
-    this.map = new Map();
-  }
-
-  get(key: K): V | undefined {
-    if (!this.map.has(key)) return undefined;
-
-    // Move the key to the end to mark it as recently used
-    const value = this.map.get(key)!;
-    this.map.delete(key);
-    this.map.set(key, value);
-    return value;
-  }
-
-  set(key: K, value: V): void {
-    if (this.map.has(key)) {
-      // Remove the old value to update position
-      this.map.delete(key);
-    } else if (this.map.size >= this.size) {
-      // Remove least recently used (first item in Map)
-      const lruKey: any = this.map.keys().next().value;
-      this.map.delete(lruKey);
-    }
-
-    // Insert the new key-value as most recently used
-    this.map.set(key, value);
-  }
-
-  has(key: K): boolean {
-    return this.map.has(key);
-  }
-
-  delete(key: K): boolean {
-    return this.map.delete(key);
-  }
-
-  keys(): IterableIterator<K> {
-    return this.map.keys();
-  }
-
-  values(): IterableIterator<V> {
-    return this.map.values();
-  }
-
-  get length(): number {
-    return this.map.size;
-  }
-}
-
 const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 });
 
 const BytezInferenceAPIConfig: ProviderConfigs = {
@@ -66,114 +14,88 @@ const BytezInferenceAPIConfig: ProviderConfigs = {
   chatComplete: BytezInferenceChatCompleteConfig,
   requestHandlers: {
     chatComplete: async ({ providerOptions, requestBody }) => {
-      const { model: modelId } = requestBody;
+      try {
+        const { model: modelId } = requestBody;
 
-      let adaptedBody;
+        const adaptedBody = bodyAdapter(requestBody);
 
-      try {
-        adaptedBody = bodyAdapter(requestBody);
-      } catch (error: any) {
-        return new Response(
-          JSON.stringify({
-            status: 'failure',
-            message: error.message,
-          }),
-          {
-            status: 500,
-            headers: {
-              'content-type': 'application/json',
-            },
-          }
-        );
-      }
+        const headers = {
+          'Content-Type': 'application/json',
+          Authorization: `Key ${providerOptions.apiKey}`,
+        };
 
-      const headers = {
-        'Content-Type': 'application/json',
-        Authorization: `Key ${providerOptions.apiKey}`,
-      };
+        const isChatModel = await validateModelIsChat(modelId, headers);
 
-      const isChatModel = await validateModelIsChat(modelId, headers);
+        if (!isChatModel) {
+          return constructFailureResponse(
+            'Bytez only supports chat models on PortKey',
+            { status: 400 }
+          );
+        }
 
-      if (!isChatModel) {
-        return new Response(
-          JSON.stringify({
-            status: 'failure',
-            message: 'Bytez only supports chat models on PortKey',
-          }),
-          {
-            status: 500,
-            headers: {
-              'content-type': 'application/json',
-            },
-          }
-        );
-      }
+        const url = `${BASE_URL}/${modelId}`;
 
-      const url = `${BASE_URL}/${modelId}`;
+        const response = await fetch(url, {
+          method: 'POST',
+          headers,
+          body: JSON.stringify(adaptedBody),
+        });
 
-      const response = await fetch(url, {
-        method: 'POST',
-        headers,
-        body: JSON.stringify(adaptedBody),
-      });
+        if (adaptedBody.stream) {
+          return new Response(response.body, response);
+        }
 
-      if (adaptedBody.stream) {
-        return new Response(response.body, response);
-      }
+        const { error, output }: BytezResponse = await response.json();
 
-      const { error, output }: { error: string | null; output: object | null } =
-        await response.json();
+        if (error) {
+          return constructFailureResponse(error, response);
+        }
 
-      if (error) {
         return new Response(
           JSON.stringify({
-            //
-            message: error,
+            id: crypto.randomUUID(),
+            object: 'chat.completion',
+            created: Date.now(),
+            model: modelId,
+            choices: [
+              {
+                index: 0,
+                message: output,
+                logprobs: null,
+                finish_reason: 'stop',
+              },
+            ],
+            usage: {
+              inferenceTime: response.headers.get('inference-time'),
+              modelSize: response.headers.get('inference-meter'),
+            },
           }),
           response
         );
+      } catch (error: any) {
+        return constructFailureResponse(error.message);
       }
-
-      return new Response(
-        JSON.stringify({
-          id: crypto.randomUUID(),
-          object: 'chat.completion',
-          created: Date.now(),
-          model: modelId,
-          choices: [
-            {
-              index: 0,
-              message: output,
-              logprobs: null,
-              finish_reason: 'stop',
-            },
-          ],
-          usage: {
-            inferenceTime: response.headers.get('inference-time'),
-            modelSize: response.headers.get('inference-meter'),
-            // prompt_tokens: 11,
-            // completion_tokens: 28,
-            // total_tokens: 39,
-            // prompt_tokens_details: {
-            //   cached_tokens: 0,
-            //   audio_tokens: 0,
-            // },
-            // completion_tokens_details: {
-            //   reasoning_tokens: 0,
-            //   audio_tokens: 0,
-            //   accepted_prediction_tokens: 0,
-            //   rejected_prediction_tokens: 0,
-            // },
-          },
-          // service_tier: 'default',
-          // system_fingerprint: 'fp_34a54ae93c',
-        }),
-        response
-      );
     },
   },
 };
 
+function constructFailureResponse(message: string, response?: object) {
+  return new Response(
+    JSON.stringify({
+      status: 'failure',
+      message,
+    }),
+    {
+      status: 500,
+      headers: {
+        'content-type': 'application/json',
+      },
+      // override defaults if desired
+      ...response,
+    }
+  );
+}
+
 function bodyAdapter(requestBody: Record<string, any>) {
   for (const [param, paramConfig] of Object.entries(
     BytezInferenceChatCompleteConfig
@@ -244,16 +166,6 @@ function bodyAdapter(requestBody: Record<string, any>) {
   return adaptedBody;
 }
 
-interface Model {
-  task: string;
-}
-
-interface BytezResponse {
-  error: string;
-  output: Model[];
-  // add other model properties as needed
-}
-
 async function validateModelIsChat(
   modelId: string,
   headers: Record<string, any>
diff --git a/src/providers/bytez/types.ts b/src/providers/bytez/types.ts
new file mode 100644
index 000000000..58a586abe
--- /dev/null
+++ b/src/providers/bytez/types.ts
@@ -0,0 +1,11 @@
+interface Model {
+  task: string;
+}
+
+interface BytezResponse {
+  error: string;
+  output: Model[];
+  // add other model properties as needed
+}
+
+export { Model, BytezResponse };
diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts
new file mode 100644
index 000000000..27af31b5b
--- /dev/null
+++ b/src/providers/bytez/utils.ts
@@ -0,0 +1,55 @@
+class LRUCache<K, V> {
+  private size: number;
+  private map: Map<K, V>;
+
+  constructor({ size = 100 } = {}) {
+    this.size = size;
+    this.map = new Map();
+  }
+
+  get(key: K): V | undefined {
+    if (!this.map.has(key)) return undefined;
+
+    // Move the key to the end to mark it as recently used
+    const value = this.map.get(key)!;
+    this.map.delete(key);
+    this.map.set(key, value);
+    return value;
+  }
+
+  set(key: K, value: V): void {
+    if (this.map.has(key)) {
+      // Remove the old value to update position
+      this.map.delete(key);
+    } else if (this.map.size >= this.size) {
+      // Remove least recently used (first item in Map)
+      const lruKey: any = this.map.keys().next().value;
+      this.map.delete(lruKey);
+    }
+
+    // Insert the new key-value as most recently used
+    this.map.set(key, value);
+  }
+
+  has(key: K): boolean {
+    return this.map.has(key);
+  }
+
+  delete(key: K): boolean {
+    return this.map.delete(key);
+  }
+
+  keys(): IterableIterator<K> {
+    return this.map.keys();
+  }
+
+  values(): IterableIterator<V> {
+    return this.map.values();
+  }
+
+  get length(): number {
+    return this.map.size;
+  }
+}
+
+export { LRUCache };

From c60b3673cca2bb39c2a1d1bd4ca39df7012d2143 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Thu, 26 Jun 2025 15:21:14 -0400
Subject: [PATCH 10/15] Final bit of tidying the bytez impl.

---
 src/providers/bytez/index.ts | 108 ++++++-----------------------------
 src/providers/bytez/utils.ts |  76 +++++++++++++++++++++++-
 2 files changed, 94 insertions(+), 90 deletions(-)

diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index d87e0f53a..982823c45 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -1,8 +1,8 @@
 import crypto from 'node:crypto';
-import { ParameterConfig, ProviderConfigs } from '../types';
+import { ProviderConfigs } from '../types';
 import BytezInferenceAPI from './api';
 import { BytezInferenceChatCompleteConfig } from './chatComplete';
-import { LRUCache } from './utils';
+import { bodyAdapter, LRUCache } from './utils';
 import { BytezResponse } from './types';
 
 const BASE_URL = 'https://api.bytez.com/models/v2';
@@ -79,93 +79,6 @@ const BytezInferenceAPIConfig: ProviderConfigs = {
   },
 };
 
-function constructFailureResponse(message: string, response?: object) {
-  return new Response(
-    JSON.stringify({
-      status: 'failure',
-      message,
-    }),
-    {
-      status: 500,
-      headers: {
-        'content-type': 'application/json',
-      },
-      // override defaults if desired
-      ...response,
-    }
-  );
-}
-
-function bodyAdapter(requestBody: Record<string, any>) {
-  for (const [param, paramConfig] of Object.entries(
-    BytezInferenceChatCompleteConfig
-  )) {
-    const hasParam = Boolean(requestBody[param]);
-
-    // first assign defaults
-    if (!hasParam) {
-      const { default: defaultValue, required } =
-        paramConfig as ParameterConfig;
-
-      // if it's required, throw
-      if (required) {
-        throw new Error(`Param ${param} is required`);
-      }
-
-      // assign the default value
-      if (defaultValue !== undefined && requestBody[param] === undefined) {
-        requestBody[param] = defaultValue;
-      }
-    }
-  }
-
-  // now we remap everything that has an alias, i.e. "prop" on propConfig
-  for (const [key, value] of Object.entries(requestBody)) {
-    const paramObj = BytezInferenceChatCompleteConfig[key] as
-      | ParameterConfig
-      | undefined;
-
-    if (paramObj) {
-      const { param } = paramObj;
-
-      if (key !== param) {
-        requestBody[param] = requestBody[key];
-        delete requestBody[key];
-      }
-    }
-  }
-
-  // now we adapt to the bytez input signature
-  // props to skip
-  const skipProps: Record<string, boolean> = {
-    model: true,
-  };
-
-  // props that cannot be removed from the body
-  const reservedProps: Record<string, boolean> = {
-    stream: true,
-    messages: true,
-  };
-  const adaptedBody: Record<string, any> = { params: {} };
-
-  for (const [key, value] of Object.entries(requestBody)) {
-    // things like "model"
-    if (skipProps[key]) {
-      continue;
-    }
-
-    // things like "messages", "stream"
-    if (reservedProps[key]) {
-      adaptedBody[key] = value;
-      continue;
-    }
-    // anything else, e.g. max_new_tokens
-    adaptedBody.params[key] = value;
-  }
-
-  return adaptedBody;
-}
-
 async function validateModelIsChat(
   modelId: string,
   headers: Record<string, any>
@@ -197,4 +110,21 @@ async function validateModelIsChat(
   return isChatModel;
 }
 
+function constructFailureResponse(message: string, response?: object) {
+  return new Response(
+    JSON.stringify({
+      status: 'failure',
+      message,
+    }),
+    {
+      status: 500,
+      headers: {
+        'content-type': 'application/json',
+      },
+      // override defaults if desired
+      ...response,
+    }
+  );
+}
+
 export default BytezInferenceAPIConfig;
diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts
index 27af31b5b..5f0559a9f 100644
--- a/src/providers/bytez/utils.ts
+++ b/src/providers/bytez/utils.ts
@@ -1,3 +1,6 @@
+import { ParameterConfig } from '../types';
+import { BytezInferenceChatCompleteConfig } from './chatComplete';
+
 class LRUCache<K, V> {
   private size: number;
   private map: Map<K, V>;
@@ -52,4 +55,75 @@ class LRUCache<K, V> {
   }
 }
 
-export { LRUCache };
+function bodyAdapter(requestBody: Record<string, any>) {
+  for (const [param, paramConfig] of Object.entries(
+    BytezInferenceChatCompleteConfig
+  )) {
+    const hasParam = Boolean(requestBody[param]);
+
+    // first assign defaults
+    if (!hasParam) {
+      const { default: defaultValue, required } =
+        paramConfig as ParameterConfig;
+
+      // if it's required, throw
+      if (required) {
+        throw new Error(`Param ${param} is required`);
+      }
+
+      // assign the default value
+      if (defaultValue !== undefined && requestBody[param] === undefined) {
+        requestBody[param] = defaultValue;
+      }
+    }
+  }
+
+  // now we remap everything that has an alias, i.e. "prop" on propConfig
+  for (const key of Object.keys(requestBody)) {
+    const paramObj = BytezInferenceChatCompleteConfig[key] as
+      | ParameterConfig
+      | undefined;
+
+    if (paramObj) {
+      const { param: alias } = paramObj;
+
+      if (key !== alias) {
+        requestBody[alias] = requestBody[key];
+        delete requestBody[key];
+      }
+    }
+  }
+
+  // now we adapt to the bytez input signature
+  // props to skip
+  const skipProps: Record<string, boolean> = {
+    model: true,
+  };
+
+  // props that cannot be removed from the body
+  const reservedProps: Record<string, boolean> = {
+    stream: true,
+    messages: true,
+  };
+
+  const adaptedBody: Record<string, any> = { params: {} };
+
+  for (const [key, value] of Object.entries(requestBody)) {
+    // things like "model"
+    if (skipProps[key]) {
+      continue;
+    }
+
+    // things like "messages", "stream"
+    if (reservedProps[key]) {
+      adaptedBody[key] = value;
+      continue;
+    }
+    // anything else, e.g. max_new_tokens
+    adaptedBody.params[key] = value;
+  }
+
+  return adaptedBody;
+}
+
+export { LRUCache, bodyAdapter };

From d121025b9a18ecb4f98c8b176c60bf0ef937bc19 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Thu, 3 Jul 2025 13:40:32 -0400
Subject: [PATCH 11/15] Remove explicit cypto import.

---
 src/providers/bytez/index.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index 982823c45..1991b20e9 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -1,4 +1,3 @@
-import crypto from 'node:crypto';
 import { ProviderConfigs } from '../types';
 import BytezInferenceAPI from './api';
 import { BytezInferenceChatCompleteConfig } from './chatComplete';

From 99bff2d3c99d3efe421d202df00ca66c8f7c16c2 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Thu, 3 Jul 2025 13:41:20 -0400
Subject: [PATCH 12/15] Remove comment from bytez types.

---
 src/providers/bytez/types.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/providers/bytez/types.ts b/src/providers/bytez/types.ts
index 58a586abe..1d640ea8b 100644
--- a/src/providers/bytez/types.ts
+++ b/src/providers/bytez/types.ts
@@ -5,7 +5,6 @@ interface Model {
 interface BytezResponse {
   error: string;
   output: Model[];
-  // add other model properties as needed
 }
 
 export { Model, BytezResponse };

From 86aae1e74da461f3707dee250f78ab11c42a93c7 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 9 Jul 2025 14:44:33 -0400
Subject: [PATCH 13/15] Make Bytez impl conform to the transform spec per
 maintainer feedback.

---
 src/handlers/handlerUtils.ts        |   7 +-
 src/providers/bytez/api.ts          |   2 +
 src/providers/bytez/chatComplete.ts |   8 +-
 src/providers/bytez/index.ts        | 155 ++++++++--------------------
 src/providers/bytez/utils.ts        | 129 -----------------------
 5 files changed, 47 insertions(+), 254 deletions(-)
 delete mode 100644 src/providers/bytez/utils.ts

diff --git a/src/handlers/handlerUtils.ts b/src/handlers/handlerUtils.ts
index 9584a8a7b..7388d3e15 100644
--- a/src/handlers/handlerUtils.ts
+++ b/src/handlers/handlerUtils.ts
@@ -510,12 +510,7 @@ export async function tryPost(
           body: transformedRequestBody,
           headers: fetchOptions.headers,
         },
-        requestParams: {
-          // in the event transformedRequestBody request is empty, e.g. you have opted to handle requests via a custom requestHandler
-          ...params,
-          // if this is populated, we will overwrite whatever was initially in params
-          ...transformedRequestBody,
-        },
+        requestParams: transformedRequestBody,
         finalUntransformedRequest: {
           body: params,
         },
diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts
index e95fb36ae..ee86b3231 100644
--- a/src/providers/bytez/api.ts
+++ b/src/providers/bytez/api.ts
@@ -1,4 +1,5 @@
 import { ProviderAPIConfig } from '../types';
+import { version } from '../../../package.json';
 
 const BytezInferenceAPI: ProviderAPIConfig = {
   getBaseURL: () => 'https://api.bytez.com',
@@ -8,6 +9,7 @@ const BytezInferenceAPI: ProviderAPIConfig = {
     const headers: Record<string, string> = {};
 
     headers['Authorization'] = `Key ${apiKey}`;
+    headers['user-agent'] = `portkey-${version}`;
 
     return headers;
   },
diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
index e7865a66c..519e0a8b4 100644
--- a/src/providers/bytez/chatComplete.ts
+++ b/src/providers/bytez/chatComplete.ts
@@ -6,20 +6,18 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = {
     required: true,
   },
   max_tokens: {
-    // NOTE param acts as an alias, it will be added to "params" on the req body
-    // we do this adaptation ourselves in our custom requestHandler. See src/providers/bytez/index.ts
-    param: 'max_new_tokens',
+    param: 'params.max_new_tokens',
     default: 100,
     min: 0,
   },
   temperature: {
-    param: 'temperature',
+    param: 'params.temperature',
     default: 1,
     min: 0,
     max: 2,
   },
   top_p: {
-    param: 'top_p',
+    param: 'params.top_p',
     default: 1,
     min: 0,
     max: 1,
diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index 1991b20e9..581d2ee1e 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -1,129 +1,56 @@
+import { BYTEZ } from '../../globals';
 import { ProviderConfigs } from '../types';
+import { generateErrorResponse } from '../utils';
 import BytezInferenceAPI from './api';
 import { BytezInferenceChatCompleteConfig } from './chatComplete';
-import { bodyAdapter, LRUCache } from './utils';
 import { BytezResponse } from './types';
 
-const BASE_URL = 'https://api.bytez.com/models/v2';
-
-const IS_CHAT_MODEL_CACHE = new LRUCache({ size: 100 });
-
 const BytezInferenceAPIConfig: ProviderConfigs = {
   api: BytezInferenceAPI,
   chatComplete: BytezInferenceChatCompleteConfig,
-  requestHandlers: {
-    chatComplete: async ({ providerOptions, requestBody }) => {
-      try {
-        const { model: modelId } = requestBody;
-
-        const adaptedBody = bodyAdapter(requestBody);
-
-        const headers = {
-          'Content-Type': 'application/json',
-          Authorization: `Key ${providerOptions.apiKey}`,
-        };
-
-        const isChatModel = await validateModelIsChat(modelId, headers);
-
-        if (!isChatModel) {
-          return constructFailureResponse(
-            'Bytez only supports chat models on PortKey',
-            { status: 400 }
-          );
-        }
-
-        const url = `${BASE_URL}/${modelId}`;
-
-        const response = await fetch(url, {
-          method: 'POST',
-          headers,
-          body: JSON.stringify(adaptedBody),
-        });
-
-        if (adaptedBody.stream) {
-          return new Response(response.body, response);
-        }
-
-        const { error, output }: BytezResponse = await response.json();
-
-        if (error) {
-          return constructFailureResponse(error, response);
-        }
-
-        return new Response(
-          JSON.stringify({
-            id: crypto.randomUUID(),
-            object: 'chat.completion',
-            created: Date.now(),
-            model: modelId,
-            choices: [
-              {
-                index: 0,
-                message: output,
-                logprobs: null,
-                finish_reason: 'stop',
-              },
-            ],
-            usage: {
-              inferenceTime: response.headers.get('inference-time'),
-              modelSize: response.headers.get('inference-meter'),
-            },
-          }),
-          response
+  responseTransforms: {
+    chatComplete: (
+      response: BytezResponse,
+      responseStatus: number,
+      responseHeaders: any,
+      strictOpenAiCompliance: boolean,
+      endpoint: string,
+      requestBody: any
+    ) => {
+      const { error, output } = response;
+
+      if (error) {
+        return generateErrorResponse(
+          {
+            message: error,
+            type: String(responseStatus),
+            param: null,
+            code: null,
+          },
+          BYTEZ
         );
-      } catch (error: any) {
-        return constructFailureResponse(error.message);
       }
+
+      return {
+        id: crypto.randomUUID(),
+        object: 'chat.completion',
+        created: Date.now(),
+        model: requestBody.model,
+        choices: [
+          {
+            index: 0,
+            message: output,
+            logprobs: null,
+            finish_reason: 'stop',
+          },
+        ],
+        usage: {
+          inferenceTime: responseHeaders.get('inference-time'),
+          modelSize: responseHeaders.get('inference-meter'),
+        },
+      };
     },
   },
 };
 
-async function validateModelIsChat(
-  modelId: string,
-  headers: Record<string, any>
-) {
-  // return from cache if already validated
-  if (IS_CHAT_MODEL_CACHE.has(modelId)) {
-    return IS_CHAT_MODEL_CACHE.get(modelId);
-  }
-
-  const url = `${BASE_URL}/list/models?modelId=${modelId}`;
-
-  const response = await fetch(url, {
-    headers,
-  });
-
-  const {
-    error,
-    output: [model],
-  }: BytezResponse = await response.json();
-
-  if (error) {
-    throw new Error(error);
-  }
-
-  const isChatModel = model.task === 'chat';
-
-  IS_CHAT_MODEL_CACHE.set(modelId, isChatModel);
-
-  return isChatModel;
-}
-
-function constructFailureResponse(message: string, response?: object) {
-  return new Response(
-    JSON.stringify({
-      status: 'failure',
-      message,
-    }),
-    {
-      status: 500,
-      headers: {
-        'content-type': 'application/json',
-      },
-      // override defaults if desired
-      ...response,
-    }
-  );
-}
-
 export default BytezInferenceAPIConfig;
diff --git a/src/providers/bytez/utils.ts b/src/providers/bytez/utils.ts
deleted file mode 100644
index 5f0559a9f..000000000
--- a/src/providers/bytez/utils.ts
+++ /dev/null
@@ -1,129 +0,0 @@
-import { ParameterConfig } from '../types';
-import { BytezInferenceChatCompleteConfig } from './chatComplete';
-
-class LRUCache<K, V> {
-  private size: number;
-  private map: Map<K, V>;
-
-  constructor({ size = 100 } = {}) {
-    this.size = size;
-    this.map = new Map();
-  }
-
-  get(key: K): V | undefined {
-    if (!this.map.has(key)) return undefined;
-
-    // Move the key to the end to mark it as recently used
-    const value = this.map.get(key)!;
-    this.map.delete(key);
-    this.map.set(key, value);
-    return value;
-  }
-
-  set(key: K, value: V): void {
-    if (this.map.has(key)) {
-      // Remove the old value to update position
-      this.map.delete(key);
-    } else if (this.map.size >= this.size) {
-      // Remove least recently used (first item in Map)
-      const lruKey: any = this.map.keys().next().value;
-      this.map.delete(lruKey);
-    }
-
-    // Insert the new key-value as most recently used
-    this.map.set(key, value);
-  }
-
-  has(key: K): boolean {
-    return this.map.has(key);
-  }
-
-  delete(key: K): boolean {
-    return this.map.delete(key);
-  }
-
-  keys(): IterableIterator<K> {
-    return this.map.keys();
-  }
-
-  values(): IterableIterator<V> {
-    return this.map.values();
-  }
-
-  get length(): number {
-    return this.map.size;
-  }
-}
-
-function bodyAdapter(requestBody: Record<string, any>) {
-  for (const [param, paramConfig] of Object.entries(
-    BytezInferenceChatCompleteConfig
-  )) {
-    const hasParam = Boolean(requestBody[param]);
-
-    // first assign defaults
-    if (!hasParam) {
-      const { default: defaultValue, required } =
-        paramConfig as ParameterConfig;
-
-      // if it's required, throw
-      if (required) {
-        throw new Error(`Param ${param} is required`);
-      }
-
-      // assign the default value
-      if (defaultValue !== undefined && requestBody[param] === undefined) {
-        requestBody[param] = defaultValue;
-      }
-    }
-  }
-
-  // now we remap everything that has an alias, i.e. "prop" on propConfig
-  for (const key of Object.keys(requestBody)) {
-    const paramObj = BytezInferenceChatCompleteConfig[key] as
-      | ParameterConfig
-      | undefined;
-
-    if (paramObj) {
-      const { param: alias } = paramObj;
-
-      if (key !== alias) {
-        requestBody[alias] = requestBody[key];
-        delete requestBody[key];
-      }
-    }
-  }
-
-  // now we adapt to the bytez input signature
-  // props to skip
-  const skipProps: Record<string, boolean> = {
-    model: true,
-  };
-
-  // props that cannot be removed from the body
-  const reservedProps: Record<string, boolean> = {
-    stream: true,
-    messages: true,
-  };
-
-  const adaptedBody: Record<string, any> = { params: {} };
-
-  for (const [key, value] of Object.entries(requestBody)) {
-    // things like "model"
-    if (skipProps[key]) {
-      continue;
-    }
-
-    // things like "messages", "stream"
-    if (reservedProps[key]) {
-      adaptedBody[key] = value;
-      continue;
-    }
-    // anything else, e.g. max_new_tokens
-    adaptedBody.params[key] = value;
-  }
-
-  return adaptedBody;
-}
-
-export { LRUCache, bodyAdapter };

From a6070dc052a49f1b4b05f14ec46fe79beb5645a6 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Wed, 9 Jul 2025 15:08:03 -0400
Subject: [PATCH 14/15] Update user agent string for bytez provider config.

---
 src/providers/bytez/api.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/providers/bytez/api.ts b/src/providers/bytez/api.ts
index ee86b3231..c54ab546e 100644
--- a/src/providers/bytez/api.ts
+++ b/src/providers/bytez/api.ts
@@ -9,7 +9,7 @@ const BytezInferenceAPI: ProviderAPIConfig = {
     const headers: Record<string, string> = {};
 
     headers['Authorization'] = `Key ${apiKey}`;
-    headers['user-agent'] = `portkey-${version}`;
+    headers['user-agent'] = `portkey/${version}`;
 
     return headers;
   },

From 73f5c05cb37ecf9e29333023c8cb5dcc8f1a7cd7 Mon Sep 17 00:00:00 2001
From: Aaron Vogler <aaronvogler@gmail.com>
Date: Fri, 11 Jul 2025 14:48:37 -0400
Subject: [PATCH 15/15] Move bytez chatComplete responseTransform into
 chatComplete.ts, conform to openai compliant spec.

---
 src/providers/bytez/chatComplete.ts | 48 ++++++++++++++++++++++++++++-
 src/providers/bytez/index.ts        | 47 ++--------------------------
 2 files changed, 49 insertions(+), 46 deletions(-)

diff --git a/src/providers/bytez/chatComplete.ts b/src/providers/bytez/chatComplete.ts
index 519e0a8b4..5c2c1756f 100644
--- a/src/providers/bytez/chatComplete.ts
+++ b/src/providers/bytez/chatComplete.ts
@@ -1,4 +1,7 @@
+import { BYTEZ } from '../../globals';
 import { ProviderConfig } from '../types';
+import { BytezResponse } from './types';
+import { generateErrorResponse } from '../utils';
 
 const BytezInferenceChatCompleteConfig: ProviderConfig = {
   messages: {
@@ -28,4 +31,47 @@ const BytezInferenceChatCompleteConfig: ProviderConfig = {
   },
 };
 
-export { BytezInferenceChatCompleteConfig };
+function chatComplete(
+  response: BytezResponse,
+  responseStatus: number,
+  responseHeaders: any,
+  strictOpenAiCompliance: boolean,
+  endpoint: string,
+  requestBody: any
+) {
+  const { error, output } = response;
+
+  if (error) {
+    return generateErrorResponse(
+      {
+        message: error,
+        type: String(responseStatus),
+        param: null,
+        code: null,
+      },
+      BYTEZ
+    );
+  }
+
+  return {
+    id: crypto.randomUUID(),
+    object: 'chat.completion',
+    created: Date.now(),
+    model: requestBody.model,
+    choices: [
+      {
+        index: 0,
+        message: output,
+        logprobs: null,
+        finish_reason: 'stop',
+      },
+    ],
+    usage: {
+      completion_tokens: -1,
+      prompt_tokens: -1,
+      total_tokens: -1,
+    },
+  };
+}
+
+export { BytezInferenceChatCompleteConfig, chatComplete };
diff --git a/src/providers/bytez/index.ts b/src/providers/bytez/index.ts
index 581d2ee1e..2b1782bec 100644
--- a/src/providers/bytez/index.ts
+++ b/src/providers/bytez/index.ts
@@ -1,55 +1,12 @@
-import { BYTEZ } from '../../globals';
 import { ProviderConfigs } from '../types';
-import { generateErrorResponse } from '../utils';
 import BytezInferenceAPI from './api';
-import { BytezInferenceChatCompleteConfig } from './chatComplete';
-import { BytezResponse } from './types';
+import { BytezInferenceChatCompleteConfig, chatComplete } from './chatComplete';
 
 const BytezInferenceAPIConfig: ProviderConfigs = {
   api: BytezInferenceAPI,
   chatComplete: BytezInferenceChatCompleteConfig,
   responseTransforms: {
-    chatComplete: (
-      response: BytezResponse,
-      responseStatus: number,
-      responseHeaders: any,
-      strictOpenAiCompliance: boolean,
-      endpoint: string,
-      requestBody: any
-    ) => {
-      const { error, output } = response;
-
-      if (error) {
-        return generateErrorResponse(
-          {
-            message: error,
-            type: String(responseStatus),
-            param: null,
-            code: null,
-          },
-          BYTEZ
-        );
-      }
-
-      return {
-        id: crypto.randomUUID(),
-        object: 'chat.completion',
-        created: Date.now(),
-        model: requestBody.model,
-        choices: [
-          {
-            index: 0,
-            message: output,
-            logprobs: null,
-            finish_reason: 'stop',
-          },
-        ],
-        usage: {
-          inferenceTime: responseHeaders.get('inference-time'),
-          modelSize: responseHeaders.get('inference-meter'),
-        },
-      };
-    },
+    chatComplete,
   },
 };