From 9ee335821dd1bb003e736b47b1f19edca700be6c Mon Sep 17 00:00:00 2001 From: Asad Memon Date: Tue, 16 Jan 2024 18:46:14 -0800 Subject: [PATCH 1/4] demo: added mixtral as an option --- website/components/editor.jsx | 36 ++++++++++++++- website/package-lock.json | 14 ++++++ website/package.json | 1 + website/pages/api/autocomplete.js | 74 ++++++++++++++++++++++++++++++- 4 files changed, 123 insertions(+), 2 deletions(-) diff --git a/website/components/editor.jsx b/website/components/editor.jsx index 002c197..bcdb45f 100644 --- a/website/components/editor.jsx +++ b/website/components/editor.jsx @@ -19,9 +19,12 @@ const DEFAULTCODE = `function add(num1, num2){ function CodeEditor() { const [model, setModel] = useState("gpt-3.5-turbo-1106"); + const [delay, setDelay] = useState(500); const [acceptOnClick, setAcceptOnClick] = useState(true); return ( <> +
+ + + + +
=6" } }, + "node_modules/extract-json-from-string": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/extract-json-from-string/-/extract-json-from-string-1.0.1.tgz", + "integrity": "sha512-xfQOSFYbELVs9QVkKsV9FZAjlAmXQ2SLR6FpfFX1kpn4QAvaGBJlrnVOblMLwrLPYc26H+q9qxo6JTd4E7AwgQ==", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/fast-glob": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", @@ -3610,6 +3619,11 @@ "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==" }, + "extract-json-from-string": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/extract-json-from-string/-/extract-json-from-string-1.0.1.tgz", + "integrity": "sha512-xfQOSFYbELVs9QVkKsV9FZAjlAmXQ2SLR6FpfFX1kpn4QAvaGBJlrnVOblMLwrLPYc26H+q9qxo6JTd4E7AwgQ==" + }, "fast-glob": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", diff --git a/website/package.json b/website/package.json index ab6962d..a1179ed 100644 --- a/website/package.json +++ b/website/package.json @@ -20,6 +20,7 @@ "@uiw/react-codemirror": "^4.21.21", "class-variance-authority": "^0.7.0", "clsx": "^2.0.0", + "extract-json-from-string": "^1.0.1", "lucide-react": "^0.294.0", "next": "14.0.4", "openai": "^4.21.0", diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js index 6223a84..14c76f7 100644 --- a/website/pages/api/autocomplete.js +++ b/website/pages/api/autocomplete.js @@ -1,9 +1,81 @@ import OpenAI from "openai"; +import extract from "extract-json-from-string"; const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); +async function completionMixtral(prefix, suffix, language) { + const response = await fetch( + `https://api.fireworks.ai/inference/v1/chat/completions`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Accept': 'text/event-stream', + 'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`, + }, + body: JSON.stringify({ + model: "accounts/fireworks/models/mixtral-8x7b-instruct", + n: 1, + messages: [ + { + role: "user", + content: "You are a " + (language || "") +" programmer that replaces part with the right code. Only output the code that replaces part. Do not add any explanation or markdown. Output JSON in this structure: {\"r\": \"...\"}\n```\n" + prefix + "" + suffix + "\n```", + }, + ], + stop: [ + "<|im_start|>", + "<|im_end|>", + "<|endoftext|>" + ], + top_p: 1, + top_k: 40, + presence_penalty: 0, + frequency_penalty: 0, + prompt_truncate_len: 1024, + context_length_exceeded_behavior: "truncate", + temperature: 0.9, + max_tokens: 50 + }), + }); + + const wholeOutput = await response.json(); + const outputJsonRaw = wholeOutput?.choices[0]?.message?.content; + try { + return extract(outputJsonRaw)[0].r; + } + catch (e) { + return ""; + } +} + +function completionLlama(prefix, suffix, language) { + const url = "https://api.fireworks.ai/inference/v1/completions"; + const apiKey = process.env.FIREWORKS_API_KEY; + const headers = { + 'Authorization': `Bearer ${apiKey}`, + 'Content-Type': 'application/json' + }; + + const body = { + "model": "accounts/fireworks/models/llama-v2-7b", + "prompt": `You are a javascript programmer that replaces part with the right code. Only output the code that replaces part. Do not add any explanation or markdown. Output JSON in this structure: {r: ""} + \`\`\` + ${prefix}${suffix} + \`\`\`` + }; + + return fetch(url, { + method: 'POST', + headers: headers, + body: JSON.stringify(body) + }) + .then(response => response.json()) + .catch(error => { + console.error('Error:', error); + }); +} + async function completionLlama(prefix, suffix, language){ try { const response = await fetch( @@ -40,7 +112,7 @@ async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", lang export default async function handler(req, res) { const { prefix, suffix, model, language } = req.body; - const completionMethod = model == "codellama" ? completionLlama : completionOpenAI; + const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtral : completionOpenAI); const prediction = await completionMethod(prefix, suffix, model, language); console.log(model, prediction) res.status(200).json({ prediction }) From 16320fd43630f9186e4fed5b3fbea32fdd08b94c Mon Sep 17 00:00:00 2001 From: Asad Memon Date: Tue, 16 Jan 2024 18:49:47 -0800 Subject: [PATCH 2/4] typo --- website/pages/api/autocomplete.js | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js index 14c76f7..07ba7d5 100644 --- a/website/pages/api/autocomplete.js +++ b/website/pages/api/autocomplete.js @@ -49,33 +49,6 @@ async function completionMixtral(prefix, suffix, language) { } } -function completionLlama(prefix, suffix, language) { - const url = "https://api.fireworks.ai/inference/v1/completions"; - const apiKey = process.env.FIREWORKS_API_KEY; - const headers = { - 'Authorization': `Bearer ${apiKey}`, - 'Content-Type': 'application/json' - }; - - const body = { - "model": "accounts/fireworks/models/llama-v2-7b", - "prompt": `You are a javascript programmer that replaces part with the right code. Only output the code that replaces part. Do not add any explanation or markdown. Output JSON in this structure: {r: ""} - \`\`\` - ${prefix}${suffix} - \`\`\`` - }; - - return fetch(url, { - method: 'POST', - headers: headers, - body: JSON.stringify(body) - }) - .then(response => response.json()) - .catch(error => { - console.error('Error:', error); - }); -} - async function completionLlama(prefix, suffix, language){ try { const response = await fetch( From fc1fff0c6174b3de34b3e88fd17b5cb54ef7eb19 Mon Sep 17 00:00:00 2001 From: Asad Memon Date: Tue, 16 Jan 2024 20:38:47 -0800 Subject: [PATCH 3/4] json with cleanup and retry --- website/pages/api/autocomplete.js | 68 ++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js index 07ba7d5..9f3bbbc 100644 --- a/website/pages/api/autocomplete.js +++ b/website/pages/api/autocomplete.js @@ -5,7 +5,56 @@ const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, }); -async function completionMixtral(prefix, suffix, language) { +function removeOverlapPrefixSuffix(text, prefix, suffix) { + // Remove overlapping part from the start (prefix) + let commonPrefixLength = 0; + for (let i = 0; i < prefix.length; i++) { + if (text.startsWith(prefix.slice(i))) { + commonPrefixLength = prefix.length - i; + break; + } + } + if (commonPrefixLength > 0) { + text = text.slice(commonPrefixLength); + } + + // Remove overlapping part from the end (suffix) + let commonSuffixLength = 0; + for (let i = 0; i < suffix.length; i++) { + if (text.endsWith(suffix.substring(0, i + 1))) { + commonSuffixLength = i + 1; + break; + } + } + if (commonSuffixLength > 0) { + text = text.slice(0, -commonSuffixLength); + } + + return text.trim(); +} + +async function completionMixtralWithCleanup(prefix, suffix, language){ + const text = await completionMixtral(prefix, suffix, language); + return removeOverlapPrefixSuffix(text, prefix, suffix); +} + +async function completionMixtral(prefix, suffix, language, previousOutput) { + let messages = [ + { + role: "user", + content: "You are a " + (language || "") +" programmer that replaces part with the right code. Only output the code that replaces part. Do not add any explanation or markdown. ```\n" + prefix + "" + suffix + "\n```" + "\nOutput: JSON in this structure: {\"r\": \"...\"}\n", + }, + ]; + if (previousOutput) { + messages.push({ + role: "assistant", + content: previousOutput, + }); + messages.push({ + role: "user", + content: "The previous output was not formatted correctly. Please try again. Output should be JSON in this structure: {\"r\": \"...\"}", + }); + } const response = await fetch( `https://api.fireworks.ai/inference/v1/chat/completions`, { method: 'POST', @@ -17,24 +66,19 @@ async function completionMixtral(prefix, suffix, language) { body: JSON.stringify({ model: "accounts/fireworks/models/mixtral-8x7b-instruct", n: 1, - messages: [ - { - role: "user", - content: "You are a " + (language || "") +" programmer that replaces part with the right code. Only output the code that replaces part. Do not add any explanation or markdown. Output JSON in this structure: {\"r\": \"...\"}\n```\n" + prefix + "" + suffix + "\n```", - }, - ], + messages: messages, stop: [ "<|im_start|>", "<|im_end|>", "<|endoftext|>" ], top_p: 1, - top_k: 40, + top_k: 30, presence_penalty: 0, frequency_penalty: 0, prompt_truncate_len: 1024, context_length_exceeded_behavior: "truncate", - temperature: 0.9, + temperature: 0.8, max_tokens: 50 }), }); @@ -45,6 +89,10 @@ async function completionMixtral(prefix, suffix, language) { return extract(outputJsonRaw)[0].r; } catch (e) { + console.log(e, outputJsonRaw); + if (!previousOutput) { + return await completionMixtral(prefix, suffix, language, outputJsonRaw); + } return ""; } } @@ -85,7 +133,7 @@ async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", lang export default async function handler(req, res) { const { prefix, suffix, model, language } = req.body; - const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtral : completionOpenAI); + const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithCleanup : completionOpenAI); const prediction = await completionMethod(prefix, suffix, model, language); console.log(model, prediction) res.status(200).json({ prediction }) From 5a22aa0d28eec66618ae298b380cf4792425e112 Mon Sep 17 00:00:00 2001 From: Asad Memon Date: Tue, 16 Jan 2024 22:29:10 -0800 Subject: [PATCH 4/4] fixes to mistral, reorg --- website/lib/backends/llama.js | 19 ++++ website/lib/backends/mistral.js | 122 ++++++++++++++++++++++++++ website/lib/backends/openai.js | 20 +++++ website/pages/api/autocomplete.js | 141 ++---------------------------- 4 files changed, 167 insertions(+), 135 deletions(-) create mode 100644 website/lib/backends/llama.js create mode 100644 website/lib/backends/mistral.js create mode 100644 website/lib/backends/openai.js diff --git a/website/lib/backends/llama.js b/website/lib/backends/llama.js new file mode 100644 index 0000000..263b9d6 --- /dev/null +++ b/website/lib/backends/llama.js @@ -0,0 +1,19 @@ + +export async function completionLlama(prefix, suffix, language){ + try { + const response = await fetch( + `https://api.cloudflare.com/client/v4/accounts/${process.env.CLOUDLFARE_ID}/ai/run/@hf/thebloke/codellama-7b-instruct-awq`, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${process.env.CLOUDFLARE_KEY}`, + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ "prompt": `You are a ${language?(language + " "):""}programmer. Do not add any explanation or markdown.
${prefix}${suffix}`, "max_tokens": 30 })
+    });
+    
+    const data = await response.json();
+    return data.result.response;
+  } catch (error) {
+    console.error('Error:', error);
+  }
+}
\ No newline at end of file
diff --git a/website/lib/backends/mistral.js b/website/lib/backends/mistral.js
new file mode 100644
index 0000000..0133b77
--- /dev/null
+++ b/website/lib/backends/mistral.js
@@ -0,0 +1,122 @@
+
+function extractCodeSegments(markdownText) {
+  // Regular expression to match code blocks (optionally including a language specifier)
+  const codeBlockRegex = /```[a-z]*[\s\S]*?```/g;
+
+  // Find matches for the regex in the provided markdown text
+  const matches = markdownText.match(codeBlockRegex);
+
+  if (matches) {
+      // Remove the backticks and the optional language specifier, then trim whitespace
+      return matches.map(match => match.replace(/```[a-z]*\n?/, '').replace(/```/, '').trim());
+  } else {
+      // Return an empty array if no matches are found
+      return [markdownText];
+  }
+}
+function removeOverlapPrefixSuffix(text, prefix, suffix) {
+  // Remove overlapping part from the start (prefix)
+  let commonPrefixLength = 0;
+  for (let i = 0; i < prefix.length; i++) {
+      if (text.startsWith(prefix.slice(i))) {
+          commonPrefixLength = prefix.length - i;
+          break;
+      }
+  }
+  if (commonPrefixLength > 0) {
+      text = text.slice(commonPrefixLength);
+  }
+  else{
+    throw new Error("prefix not found");
+  }
+
+  // Remove overlapping part from the end (suffix)
+  let commonSuffixLength = 0;
+  for (let i = 0; i < suffix.length; i++) {
+      if (text.endsWith(suffix.substring(0, i + 1))) {
+          commonSuffixLength = i + 1;
+          break;
+      }
+  }
+  if (commonSuffixLength > 0) {
+      text = text.slice(0, -commonSuffixLength);
+  }
+  else{
+    throw new Error("suffix not found");
+  }
+
+  return text;
+}
+
+async function completionMixtral(prefix, suffix, language, previousOutput) {
+  let messages = [
+      {
+          role: "user",
+          content: "You are a " + (language || "") +" programmer that replaces  part with the right code. ALWAYS INCLUDE PREFIX AND SUFFIX in the completed code.\n Do not format code, leave prefix and suffix as-is, only replace  part, do not include any code comments. ```\n" + prefix + "" + suffix + "\n```" + "\nPut output in markdown\n",
+      },
+  ];
+  if (previousOutput) {
+    messages.push({
+      role: "assistant",
+      content: previousOutput,
+    });
+    messages.push({
+      role: "user",
+      content: "The previous output was not formatted correctly. Please try again. Output should be in markdown code block and should include prefix and suffix.",
+    });
+  }
+  const response = await fetch(
+    `https://api.fireworks.ai/inference/v1/chat/completions`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'Accept': 'text/event-stream',
+      'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`,
+    },
+    body: JSON.stringify({
+      model: "accounts/fireworks/models/mixtral-8x7b-instruct",
+      n: 1,
+      messages: messages,
+      stop: [
+          "<|im_start|>",
+          "<|im_end|>",
+          "<|endoftext|>"
+      ],
+      top_p: 1,
+      top_k: 40,
+      presence_penalty: 0,
+      frequency_penalty: 0,
+      prompt_truncate_len: 1024,
+      context_length_exceeded_behavior: "truncate",
+      temperature: 0.9,
+      max_tokens: 150
+    }),
+  });
+
+  const wholeOutput = await response.json();
+  const outputRaw = wholeOutput?.choices[0]?.message?.content;
+
+  try {
+    // extract markdown code part
+    const codeItself = extractCodeSegments(outputRaw)[0];
+    // check if  is still there
+    if (codeItself.includes("0){
+    const output = await completionMixtral(prefix, suffix, language);
+    if (output) return output;
+  }
+  return "";
+}
diff --git a/website/lib/backends/openai.js b/website/lib/backends/openai.js
new file mode 100644
index 0000000..c2cd053
--- /dev/null
+++ b/website/lib/backends/openai.js
@@ -0,0 +1,20 @@
+import OpenAI from "openai";
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+export async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", language){
+  const chatCompletion = await openai.chat.completions.create({
+    messages: [
+      {
+        role: "system",
+        content: `You are a ${language?(language + " "):""}programmer that replaces  part with the right code. Only output the code that replaces  part. Do not add any explanation or markdown.`,
+      },
+      { role: "user", content: `${prefix}${suffix}` },
+    ],
+    model,
+  });
+
+  return chatCompletion.choices[0].message.content;
+}
\ No newline at end of file
diff --git a/website/pages/api/autocomplete.js b/website/pages/api/autocomplete.js
index 9f3bbbc..e71afa8 100644
--- a/website/pages/api/autocomplete.js
+++ b/website/pages/api/autocomplete.js
@@ -1,140 +1,11 @@
-import OpenAI from "openai";
-import extract from "extract-json-from-string";
-
-const openai = new OpenAI({
-  apiKey: process.env.OPENAI_API_KEY,
-});
-
-function removeOverlapPrefixSuffix(text, prefix, suffix) {
-  // Remove overlapping part from the start (prefix)
-  let commonPrefixLength = 0;
-  for (let i = 0; i < prefix.length; i++) {
-      if (text.startsWith(prefix.slice(i))) {
-          commonPrefixLength = prefix.length - i;
-          break;
-      }
-  }
-  if (commonPrefixLength > 0) {
-      text = text.slice(commonPrefixLength);
-  }
-
-  // Remove overlapping part from the end (suffix)
-  let commonSuffixLength = 0;
-  for (let i = 0; i < suffix.length; i++) {
-      if (text.endsWith(suffix.substring(0, i + 1))) {
-          commonSuffixLength = i + 1;
-          break;
-      }
-  }
-  if (commonSuffixLength > 0) {
-      text = text.slice(0, -commonSuffixLength);
-  }
-
-  return text.trim();
-}
-
-async function completionMixtralWithCleanup(prefix, suffix, language){
-  const text = await completionMixtral(prefix, suffix, language);
-  return removeOverlapPrefixSuffix(text, prefix, suffix);
-}
-
-async function completionMixtral(prefix, suffix, language, previousOutput) {
-  let messages = [
-      {
-          role: "user",
-          content: "You are a " + (language || "") +" programmer that replaces  part with the right code. Only output the code that replaces  part. Do not add any explanation or markdown. ```\n" + prefix + "" + suffix + "\n```" + "\nOutput: JSON in this structure: {\"r\": \"...\"}\n",
-      },
-  ];
-  if (previousOutput) {
-    messages.push({
-      role: "assistant",
-      content: previousOutput,
-    });
-    messages.push({
-      role: "user",
-      content: "The previous output was not formatted correctly. Please try again. Output should be JSON in this structure: {\"r\": \"...\"}",
-    });
-  }
-  const response = await fetch(
-    `https://api.fireworks.ai/inference/v1/chat/completions`, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-      'Accept': 'text/event-stream',
-      'Authorization': `Bearer ${process.env.FIREWORKS_API_KEY}`,
-    },
-    body: JSON.stringify({
-      model: "accounts/fireworks/models/mixtral-8x7b-instruct",
-      n: 1,
-      messages: messages,
-      stop: [
-          "<|im_start|>",
-          "<|im_end|>",
-          "<|endoftext|>"
-      ],
-      top_p: 1,
-      top_k: 30,
-      presence_penalty: 0,
-      frequency_penalty: 0,
-      prompt_truncate_len: 1024,
-      context_length_exceeded_behavior: "truncate",
-      temperature: 0.8,
-      max_tokens: 50
-    }),
-  });
-
-  const wholeOutput = await response.json();
-  const outputJsonRaw = wholeOutput?.choices[0]?.message?.content;
-  try {
-    return extract(outputJsonRaw)[0].r;
-  }
-  catch (e) {
-    console.log(e, outputJsonRaw);
-    if (!previousOutput) {
-      return await completionMixtral(prefix, suffix, language, outputJsonRaw);
-    }
-    return "";
-  }
-}
-
-async function completionLlama(prefix, suffix, language){
-  try {
-    const response = await fetch(
-      `https://api.cloudflare.com/client/v4/accounts/${process.env.CLOUDLFARE_ID}/ai/run/@hf/thebloke/codellama-7b-instruct-awq`, {
-      method: 'POST',
-      headers: {
-        'Authorization': `Bearer ${process.env.CLOUDFLARE_KEY}`,
-        'Content-Type': 'application/json'
-      },
-      body: JSON.stringify({ "prompt": `You are a ${language?(language + " "):""}programmer. Do not add any explanation or markdown. 
${prefix}${suffix}`, "max_tokens": 30 })
-    });
-    
-    const data = await response.json();
-    return data.result.response;
-  } catch (error) {
-    console.error('Error:', error);
-  }
-}
-
-async function completionOpenAI(prefix, suffix, model="gpt-3.5-turbo-1106", language){
-  const chatCompletion = await openai.chat.completions.create({
-    messages: [
-      {
-        role: "system",
-        content: `You are a ${language?(language + " "):""}programmer that replaces  part with the right code. Only output the code that replaces  part. Do not add any explanation or markdown.`,
-      },
-      { role: "user", content: `${prefix}${suffix}` },
-    ],
-    model,
-  });
-
-  return chatCompletion.choices[0].message.content;
-}
+import { completionLlama } from "@/lib/backends/llama";
+import { completionMixtralWithRetries } from "@/lib/backends/mistral";
+import { completionOpenAI } from "@/lib/backends/openai";
 
 export default async function handler(req, res) {
   const { prefix, suffix, model, language } = req.body;
-  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithCleanup : completionOpenAI);
+  const completionMethod = model == "codellama" ? completionLlama : (model==="mixtral-8x7b"? completionMixtralWithRetries : completionOpenAI);
   const prediction = await completionMethod(prefix, suffix, model, language);
-  console.log(model, prediction)
-  res.status(200).json({ prediction })
+  console.log(model, prediction);
+  res.status(200).json({ prediction });
 }