From 9ee335821dd1bb003e736b47b1f19edca700be6c Mon Sep 17 00:00:00 2001
From: Asad Memon <asad.lionpk@gmail.com>
Date: Tue, 16 Jan 2024 18:46:14 -0800
Subject: [PATCH 1/4] demo: added mixtral as an option

---
 website/components/editor.jsx     | 36 ++++++++++++++-
 website/package-lock.json         | 14 ++++++
 website/package.json              |  1 +
 website/pages/api/autocomplete.js | 74 ++++++++++++++++++++++++++++++-
 4 files changed, 123 insertions(+), 2 deletions(-)
diff --git a/website/components/editor.jsx b/website/components/editor.jsx
index 002c197..bcdb45f 100644
--- a/website/components/editor.jsx
+++ b/website/components/editor.jsx
@@ -19,9 +19,12 @@ const DEFAULTCODE = `function add(num1, num2){
 
 function CodeEditor() {
   const [model, setModel] = useState("gpt-3.5-turbo-1106");
+  const [delay, setDelay] = useState(500);
   const [acceptOnClick, setAcceptOnClick] = useState(true);
   return (
     <>
+      <div className="flex items-center gap-2">
+        <label>Model</label>
       <Select
         value={model}
         onValueChange={(value) => {
@@ -36,6 +39,9 @@ function CodeEditor() {
           <SelectItem value="gpt-3.5-turbo-1106">
             GPT 3.5 Turbo <Badge variant="secondary">recommended</Badge>
           </SelectItem>
+          <SelectItem value="mixtral-8x7b">
+            Mixtral MoE 8x7B Instruct <Badge variant="secondary">best open source</Badge>
+          </SelectItem>
           <SelectItem value="codellama">
             Code Llama <Badge variant="secondary">buggy</Badge>
           </SelectItem>
@@ -44,6 +50,34 @@ function CodeEditor() {
           </SelectItem>
         </SelectContent>
       </Select>
+
+      <label className="ml-2">Delay</label>
+      <Select
+        value={delay}
+        onValueChange={(value) => {
+          setDelay(value);
+          clearLocalCache();
+        }}
+      >
+        <SelectTrigger className="w-[180px]">
+          <SelectValue placeholder="Delay" />
+        </SelectTrigger>
+        <SelectContent>
+          <SelectItem value={500}>
+            500ms <Badge variant="secondary">recommended</Badge>
+          </SelectItem>
+          <SelectItem value={1000}>
+            1000ms <Badge variant="secondary">comfy</Badge>
+          </SelectItem>
+          <SelectItem value={100}>
+            100ms <Badge variant="destructive">psycho mode</Badge>
+          </SelectItem>
+          <SelectItem value={50}>
+            50ms <Badge variant="destructive">psycho's mom mode</Badge>
+          </SelectItem>
+        </SelectContent>
+      </Select>
+      </div>
       <CodeMirror
         style={{
           fontSize: "17px",
@@ -80,7 +114,7 @@ function CodeEditor() {
               const { prediction } = await res.json();
               return prediction;
             },
-            500,
+            delay,
             acceptOnClick,
           ),
         ]}
diff --git a/website/package-lock.json b/website/package-lock.json
index cae53bd..fc1d1b0 100644
--- a/website/package-lock.json
+++ b/website/package-lock.json
@@ -18,6 +18,7 @@
         "@uiw/react-codemirror": "^4.21.21",
         "class-variance-authority": "^0.7.0",
         "clsx": "^2.0.0",
+        "extract-json-from-string": "^1.0.1",
         "lucide-react": "^0.294.0",
         "next": "14.0.4",
         "openai": "^4.21.0",
@@ -1456,6 +1457,14 @@
         "node": ">=6"
       }
     },
+    "node_modules/extract-json-from-string": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/extract-json-from-string/-/extract-json-from-string-1.0.1.tgz",
+      "integrity": "sha512-xfQOSFYbELVs9QVkKsV9FZAjlAmXQ2SLR6FpfFX1kpn4QAvaGBJlrnVOblMLwrLPYc26H+q9qxo6JTd4E7AwgQ==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/fast-glob": {
       "version": "3.3.2",
       "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz",
@@ -3610,6 +3619,11 @@
       "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
       "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="
     },
+    "extract-json-from-string": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/extract-json-from-string/-/extract-json-from-string-1.0.1.tgz",
+      "integrity": "sha512-xfQOSFYbELVs9QVkKsV9FZAjlAmXQ2SLR6FpfFX1kpn4QAvaGBJlrnVOblMLwrLPYc26H+q9qxo6JTd4E7AwgQ=="
+    },
     "fast-glob": {
       "version": "3.3.2",
       "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz",
diff --git a/website/package.json b/website/package.json
index ab6962d..a1179ed 100644
--- a/website/package.json
+++ b/website/package.json
@@ -20,6 +20,7 @@
     "@uiw/react-codemirror": "^4.21.21",
     "class-variance-authority": "^0.7.0",
     "clsx": "^2.0.0",
+    "extract-json-from-string": "^1.0.1",
     "lucide-react": "^0.294.0",
     "next": "14.0.4",
     "openai": "^4.21.0",
diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js
index 6223a84..14c76f7 100644
--- a/website/pages/api/autocomplete.js
+++ b/website/pages/api/autocomplete.js
@@ -1,9 +1,81 @@
 import OpenAI from "openai";
+import extract from "extract-json-from-string";
 
 const openai = new OpenAI({
   apiKey: process.env.OPENAI_API_KEY,
 });
 
+async function completionMixtral(prefix, suffix, language) {
+  const response = await fetch(
+    `https://api.fireworks.ai/inference/v1/chat/completions`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Accept': 'text/event-stream',
+      'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "accounts/fireworks/models/mixtral-8x7b-instruct",
+      n: 1,
+      messages: [
+          {
+              role: "user",
+              content: "You are a " + (language || "") +" programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown. Output JSON in this structure: {\"r\": \"...\"}\n```\n" + prefix + "<FILL_ME>" + suffix + "\n```",
+          },
+      ],
+      stop: [
+          "<|im_start|>",
+          "<|im_end|>",
+          "<|endoftext|>"
+      ],
+      top_p: 1,
+      top_k: 40,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      prompt_truncate_len: 1024,
+      context_length_exceeded_behavior: "truncate",
+      temperature: 0.9,
+      max_tokens: 50
+    }),
+  });
+
+  const wholeOutput = await response.json();
+  const outputJsonRaw = wholeOutput?.choices[0]?.message?.content;
+  try {
+    return extract(outputJsonRaw)[0].r;
+  }
+  catch (e) {
+    return "";
+  }
+}
+
+function completionLlama(prefix, suffix, language) {
+  const url = "https://api.fireworks.ai/inference/v1/completions";
+  const apiKey = process.env.FIREWORKS_API_KEY;
+  const headers = {
+    'Authorization': `Bearer ${apiKey}`,
+    'Content-Type': 'application/json'
+  };
+
+  const body = {
+    "model": "accounts/fireworks/models/llama-v2-7b",
+    "prompt": `You are a javascript programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown. Output JSON in this structure: {r: ""}
+    \`\`\`
+    ${prefix}<FILL_ME>${suffix}
+    \`\`\``
+  };
+
+  return fetch(url, {
+    method: 'POST',
+    headers: headers,
+    body: JSON.stringify(body)
+  })
+  .then(response => response.json())
+  .catch(error => {
+    console.error('Error:', error);
+  });
+}
+
 async function completionLlama(prefix, suffix, language){
   try {
     const response = await fetch(
@@ -40,7 +112,7 @@ async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", lang
 
 export default async function handler(req, res) {
   const { prefix, suffix, model, language } = req.body;
-  const completionMethod = model == "codellama" ? completionLlama : completionOpenAI;
+  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtral : completionOpenAI);
   const prediction = await completionMethod(prefix, suffix, model, language);
   console.log(model, prediction)
   res.status(200).json({ prediction })

From 16320fd43630f9186e4fed5b3fbea32fdd08b94c Mon Sep 17 00:00:00 2001
From: Asad Memon <asad.lionpk@gmail.com>
Date: Tue, 16 Jan 2024 18:49:47 -0800
Subject: [PATCH 2/4] typo

---
 website/pages/api/autocomplete.js | 27 ---------------------------
 1 file changed, 27 deletions(-)

diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js
index 14c76f7..07ba7d5 100644
--- a/website/pages/api/autocomplete.js
+++ b/website/pages/api/autocomplete.js
@@ -49,33 +49,6 @@ async function completionMixtral(prefix, suffix, language) {
   }
 }
 
-function completionLlama(prefix, suffix, language) {
-  const url = "https://api.fireworks.ai/inference/v1/completions";
-  const apiKey = process.env.FIREWORKS_API_KEY;
-  const headers = {
-    'Authorization': `Bearer ${apiKey}`,
-    'Content-Type': 'application/json'
-  };
-
-  const body = {
-    "model": "accounts/fireworks/models/llama-v2-7b",
-    "prompt": `You are a javascript programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown. Output JSON in this structure: {r: ""}
-    \`\`\`
-    ${prefix}<FILL_ME>${suffix}
-    \`\`\``
-  };
-
-  return fetch(url, {
-    method: 'POST',
-    headers: headers,
-    body: JSON.stringify(body)
-  })
-  .then(response => response.json())
-  .catch(error => {
-    console.error('Error:', error);
-  });
-}
-
 async function completionLlama(prefix, suffix, language){
   try {
     const response = await fetch(

From fc1fff0c6174b3de34b3e88fd17b5cb54ef7eb19 Mon Sep 17 00:00:00 2001
From: Asad Memon <asad.lionpk@gmail.com>
Date: Tue, 16 Jan 2024 20:38:47 -0800
Subject: [PATCH 3/4] json with cleanup and retry

---
 website/pages/api/autocomplete.js | 68 ++++++++++++++++++++++++++-----
 1 file changed, 58 insertions(+), 10 deletions(-)

diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js
index 07ba7d5..9f3bbbc 100644
--- a/website/pages/api/autocomplete.js
+++ b/website/pages/api/autocomplete.js
@@ -5,7 +5,56 @@ const openai = new OpenAI({
   apiKey: process.env.OPENAI_API_KEY,
 });
 
-async function completionMixtral(prefix, suffix, language) {
+function removeOverlapPrefixSuffix(text, prefix, suffix) {
+  // Remove overlapping part from the start (prefix)
+  let commonPrefixLength = 0;
+  for (let i = 0; i < prefix.length; i++) {
+      if (text.startsWith(prefix.slice(i))) {
+          commonPrefixLength = prefix.length - i;
+          break;
+      }
+  }
+  if (commonPrefixLength > 0) {
+      text = text.slice(commonPrefixLength);
+  }
+
+  // Remove overlapping part from the end (suffix)
+  let commonSuffixLength = 0;
+  for (let i = 0; i < suffix.length; i++) {
+      if (text.endsWith(suffix.substring(0, i + 1))) {
+          commonSuffixLength = i + 1;
+          break;
+      }
+  }
+  if (commonSuffixLength > 0) {
+      text = text.slice(0, -commonSuffixLength);
+  }
+
+  return text.trim();
+}
+
+async function completionMixtralWithCleanup(prefix, suffix, language){
+  const text = await completionMixtral(prefix, suffix, language);
+  return removeOverlapPrefixSuffix(text, prefix, suffix);
+}
+
+async function completionMixtral(prefix, suffix, language, previousOutput) {
+  let messages = [
+      {
+          role: "user",
+          content: "You are a " + (language || "") +" programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown. ```\n" + prefix + "<FILL_ME>" + suffix + "\n```" + "\nOutput: JSON in this structure: {\"r\": \"...\"}\n",
+      },
+  ];
+  if (previousOutput) {
+    messages.push({
+      role: "assistant",
+      content: previousOutput,
+    });
+    messages.push({
+      role: "user",
+      content: "The previous output was not formatted correctly. Please try again. Output should be JSON in this structure: {\"r\": \"...\"}",
+    });
+  }
   const response = await fetch(
     `https://api.fireworks.ai/inference/v1/chat/completions`, {
     method: 'POST',
@@ -17,24 +66,19 @@ async function completionMixtral(prefix, suffix, language) {
     body: JSON.stringify({
       model: "accounts/fireworks/models/mixtral-8x7b-instruct",
       n: 1,
-      messages: [
-          {
-              role: "user",
-              content: "You are a " + (language || "") +" programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown. Output JSON in this structure: {\"r\": \"...\"}\n```\n" + prefix + "<FILL_ME>" + suffix + "\n```",
-          },
-      ],
+      messages: messages,
       stop: [
           "<|im_start|>",
           "<|im_end|>",
           "<|endoftext|>"
       ],
       top_p: 1,
-      top_k: 40,
+      top_k: 30,
       presence_penalty: 0,
       frequency_penalty: 0,
       prompt_truncate_len: 1024,
       context_length_exceeded_behavior: "truncate",
-      temperature: 0.9,
+      temperature: 0.8,
       max_tokens: 50
     }),
   });
@@ -45,6 +89,10 @@ async function completionMixtral(prefix, suffix, language) {
     return extract(outputJsonRaw)[0].r;
   }
   catch (e) {
+    console.log(e, outputJsonRaw);
+    if (!previousOutput) {
+      return await completionMixtral(prefix, suffix, language, outputJsonRaw);
+    }
     return "";
   }
 }
@@ -85,7 +133,7 @@ async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", lang
 
 export default async function handler(req, res) {
   const { prefix, suffix, model, language } = req.body;
-  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtral : completionOpenAI);
+  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithCleanup : completionOpenAI);
   const prediction = await completionMethod(prefix, suffix, model, language);
   console.log(model, prediction)
   res.status(200).json({ prediction })

From 5a22aa0d28eec66618ae298b380cf4792425e112 Mon Sep 17 00:00:00 2001
From: Asad Memon <asad.lionpk@gmail.com>
Date: Tue, 16 Jan 2024 22:29:10 -0800
Subject: [PATCH 4/4] fixes to mistral, reorg

---
 website/lib/backends/llama.js     |  19 ++++
 website/lib/backends/mistral.js   | 122 ++++++++++++++++++++++++++
 website/lib/backends/openai.js    |  20 +++++
 website/pages/api/autocomplete.js | 141 ++----------------------------
 4 files changed, 167 insertions(+), 135 deletions(-)
 create mode 100644 website/lib/backends/llama.js
 create mode 100644 website/lib/backends/mistral.js
 create mode 100644 website/lib/backends/openai.js

diff --git a/website/lib/backends/llama.js b/website/lib/backends/llama.js
new file mode 100644
index 0000000..263b9d6
--- /dev/null
+++ b/website/lib/backends/llama.js
@@ -0,0 +1,19 @@
+
+export async function completionLlama(prefix, suffix, language){
+  try {
+    const response = await fetch(
+      `https://api.cloudflare.com/client/v4/accounts/${process.env.CLOUDLFARE_ID}/ai/run/@hf/thebloke/codellama-7b-instruct-awq`, {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${process.env.CLOUDFLARE_KEY}`,
+        'Content-Type': 'application/json'
+      },
+      body: JSON.stringify({ "prompt": `You are a ${language?(language + " "):""}programmer. Do not add any explanation or markdown. <PRE>${prefix}<SUF>${suffix}<MID>`, "max_tokens": 30 })
+    });
+    
+    const data = await response.json();
+    return data.result.response;
+  } catch (error) {
+    console.error('Error:', error);
+  }
+}
\ No newline at end of file
diff --git a/website/lib/backends/mistral.js b/website/lib/backends/mistral.js
new file mode 100644
index 0000000..0133b77
--- /dev/null
+++ b/website/lib/backends/mistral.js
@@ -0,0 +1,122 @@
+
+function extractCodeSegments(markdownText) {
+  // Regular expression to match code blocks (optionally including a language specifier)
+  const codeBlockRegex = /```[a-z]*[\s\S]*?```/g;
+
+  // Find matches for the regex in the provided markdown text
+  const matches = markdownText.match(codeBlockRegex);
+
+  if (matches) {
+      // Remove the backticks and the optional language specifier, then trim whitespace
+      return matches.map(match => match.replace(/```[a-z]*\n?/, '').replace(/```/, '').trim());
+  } else {
+      // Return an empty array if no matches are found
+      return [markdownText];
+  }
+}
+function removeOverlapPrefixSuffix(text, prefix, suffix) {
+  // Remove overlapping part from the start (prefix)
+  let commonPrefixLength = 0;
+  for (let i = 0; i < prefix.length; i++) {
+      if (text.startsWith(prefix.slice(i))) {
+          commonPrefixLength = prefix.length - i;
+          break;
+      }
+  }
+  if (commonPrefixLength > 0) {
+      text = text.slice(commonPrefixLength);
+  }
+  else{
+    throw new Error("prefix not found");
+  }
+
+  // Remove overlapping part from the end (suffix)
+  let commonSuffixLength = 0;
+  for (let i = 0; i < suffix.length; i++) {
+      if (text.endsWith(suffix.substring(0, i + 1))) {
+          commonSuffixLength = i + 1;
+          break;
+      }
+  }
+  if (commonSuffixLength > 0) {
+      text = text.slice(0, -commonSuffixLength);
+  }
+  else{
+    throw new Error("suffix not found");
+  }
+
+  return text;
+}
+
+async function completionMixtral(prefix, suffix, language, previousOutput) {
+  let messages = [
+      {
+          role: "user",
+          content: "You are a " + (language || "") +" programmer that replaces <FILL_ME> part with the right code. ALWAYS INCLUDE PREFIX AND SUFFIX in the completed code.\n Do not format code, leave prefix and suffix as-is, only replace <FILL_ME> part, do not include any code comments. ```\n" + prefix + "<FILL_ME>" + suffix + "\n```" + "\nPut output in markdown\n",
+      },
+  ];
+  if (previousOutput) {
+    messages.push({
+      role: "assistant",
+      content: previousOutput,
+    });
+    messages.push({
+      role: "user",
+      content: "The previous output was not formatted correctly. Please try again. Output should be in markdown code block and should include prefix and suffix.",
+    });
+  }
+  const response = await fetch(
+    `https://api.fireworks.ai/inference/v1/chat/completions`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Accept': 'text/event-stream',
+      'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "accounts/fireworks/models/mixtral-8x7b-instruct",
+      n: 1,
+      messages: messages,
+      stop: [
+          "<|im_start|>",
+          "<|im_end|>",
+          "<|endoftext|>"
+      ],
+      top_p: 1,
+      top_k: 40,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      prompt_truncate_len: 1024,
+      context_length_exceeded_behavior: "truncate",
+      temperature: 0.9,
+      max_tokens: 150
+    }),
+  });
+
+  const wholeOutput = await response.json();
+  const outputRaw = wholeOutput?.choices[0]?.message?.content;
+
+  try {
+    // extract markdown code part
+    const codeItself = extractCodeSegments(outputRaw)[0];
+    // check if <FILL_ME> is still there
+    if (codeItself.includes("<FILL_ME")) {
+      throw new Error("fill me still there");
+    }
+    return removeOverlapPrefixSuffix(codeItself, prefix, suffix);;
+  }
+  catch (e) {
+    if (!previousOutput) {
+      return await completionMixtral(prefix, suffix, language, outputRaw);
+    }
+    return "";
+  }
+}
+
+export async function completionMixtralWithRetries(prefix, suffix, language, _model, retries=5){
+  while(retries-->0){
+    const output = await completionMixtral(prefix, suffix, language);
+    if (output) return output;
+  }
+  return "";
+}
diff --git a/website/lib/backends/openai.js b/website/lib/backends/openai.js
new file mode 100644
index 0000000..c2cd053
--- /dev/null
+++ b/website/lib/backends/openai.js
@@ -0,0 +1,20 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+export async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", language){
+  const chatCompletion = await openai.chat.completions.create({
+    messages: [
+      {
+        role: "system",
+        content: `You are a ${language?(language + " "):""}programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown.`,
+      },
+      { role: "user", content: `${prefix}<FILL_ME>${suffix}` },
+    ],
+    model,
+  });
+
+  return chatCompletion.choices[0].message.content;
+}
\ No newline at end of file
diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js
index 9f3bbbc..e71afa8 100644
--- a/website/pages/api/autocomplete.js
+++ b/website/pages/api/autocomplete.js
@@ -1,140 +1,11 @@
-import OpenAI from "openai";
-import extract from "extract-json-from-string";
-
-const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
-});
-
-function removeOverlapPrefixSuffix(text, prefix, suffix) {
-  // Remove overlapping part from the start (prefix)
-  let commonPrefixLength = 0;
-  for (let i = 0; i < prefix.length; i++) {
-      if (text.startsWith(prefix.slice(i))) {
-          commonPrefixLength = prefix.length - i;
-          break;
-      }
-  }
-  if (commonPrefixLength > 0) {
-      text = text.slice(commonPrefixLength);
-  }
-
-  // Remove overlapping part from the end (suffix)
-  let commonSuffixLength = 0;
-  for (let i = 0; i < suffix.length; i++) {
-      if (text.endsWith(suffix.substring(0, i + 1))) {
-          commonSuffixLength = i + 1;
-          break;
-      }
-  }
-  if (commonSuffixLength > 0) {
-      text = text.slice(0, -commonSuffixLength);
-  }
-
-  return text.trim();
-}
-
-async function completionMixtralWithCleanup(prefix, suffix, language){
-  const text = await completionMixtral(prefix, suffix, language);
-  return removeOverlapPrefixSuffix(text, prefix, suffix);
-}
-
-async function completionMixtral(prefix, suffix, language, previousOutput) {
-  let messages = [
-      {
-          role: "user",
-          content: "You are a " + (language || "") +" programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown. ```\n" + prefix + "<FILL_ME>" + suffix + "\n```" + "\nOutput: JSON in this structure: {\"r\": \"...\"}\n",
-      },
-  ];
-  if (previousOutput) {
-    messages.push({
-      role: "assistant",
-      content: previousOutput,
-    });
-    messages.push({
-      role: "user",
-      content: "The previous output was not formatted correctly. Please try again. Output should be JSON in this structure: {\"r\": \"...\"}",
-    });
-  }
-  const response = await fetch(
-    `https://api.fireworks.ai/inference/v1/chat/completions`, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'Accept': 'text/event-stream',
-      'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`,
-    },
-    body: JSON.stringify({
-      model: "accounts/fireworks/models/mixtral-8x7b-instruct",
-      n: 1,
-      messages: messages,
-      stop: [
-          "<|im_start|>",
-          "<|im_end|>",
-          "<|endoftext|>"
-      ],
-      top_p: 1,
-      top_k: 30,
-      presence_penalty: 0,
-      frequency_penalty: 0,
-      prompt_truncate_len: 1024,
-      context_length_exceeded_behavior: "truncate",
-      temperature: 0.8,
-      max_tokens: 50
-    }),
-  });
-
-  const wholeOutput = await response.json();
-  const outputJsonRaw = wholeOutput?.choices[0]?.message?.content;
-  try {
-    return extract(outputJsonRaw)[0].r;
-  }
-  catch (e) {
-    console.log(e, outputJsonRaw);
-    if (!previousOutput) {
-      return await completionMixtral(prefix, suffix, language, outputJsonRaw);
-    }
-    return "";
-  }
-}
-
-async function completionLlama(prefix, suffix, language){
-  try {
-    const response = await fetch(
-      `https://api.cloudflare.com/client/v4/accounts/${process.env.CLOUDLFARE_ID}/ai/run/@hf/thebloke/codellama-7b-instruct-awq`, {
-      method: 'POST',
-      headers: {
-        'Authorization': `Bearer ${process.env.CLOUDFLARE_KEY}`,
-        'Content-Type': 'application/json'
-      },
-      body: JSON.stringify({ "prompt": `You are a ${language?(language + " "):""}programmer. Do not add any explanation or markdown. <PRE>${prefix}<SUF>${suffix}<MID>`, "max_tokens": 30 })
-    });
-    
-    const data = await response.json();
-    return data.result.response;
-  } catch (error) {
-    console.error('Error:', error);
-  }
-}
-
-async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", language){
-  const chatCompletion = await openai.chat.completions.create({
-    messages: [
-      {
-        role: "system",
-        content: `You are a ${language?(language + " "):""}programmer that replaces <FILL_ME> part with the right code. Only output the code that replaces <FILL_ME> part. Do not add any explanation or markdown.`,
-      },
-      { role: "user", content: `${prefix}<FILL_ME>${suffix}` },
-    ],
-    model,
-  });
-
-  return chatCompletion.choices[0].message.content;
-}
+import { completionLlama } from "@/lib/backends/llama";
+import { completionMixtralWithRetries } from "@/lib/backends/mistral";
+import { completionOpenAI } from "@/lib/backends/openai";
 
 export default async function handler(req, res) {
   const { prefix, suffix, model, language } = req.body;
-  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithCleanup : completionOpenAI);
+  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithRetries : completionOpenAI);
   const prediction = await completionMethod(prefix, suffix, model, language);
-  console.log(model, prediction)
-  res.status(200).json({ prediction })
+  console.log(model, prediction);
+  res.status(200).json({ prediction });
 }