From 91fa575b55aee18e72812c95f0493818b58d9301 Mon Sep 17 00:00:00 2001 From: Anajrim <115026399+Anajrim01@users.noreply.github.com> Date: Thu, 12 Sep 2024 03:54:49 +0200 Subject: [PATCH 1/5] update dep. and switch to faster tokenizer --- package-lock.json | 105 ++++++++------- package.json | 2 +- src/misc/openai.ts | 314 ++++++++++++++++++++++----------------------- 3 files changed, 214 insertions(+), 207 deletions(-) diff --git a/package-lock.json b/package-lock.json index d3502d7..8ebf8b3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@shipbit/slickgpt", - "version": "2.1.0", + "version": "2.2.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@shipbit/slickgpt", - "version": "2.1.0", + "version": "2.2.0", "license": "MIT", "dependencies": { "@azure/msal-browser": "^3.13.0", @@ -16,10 +16,10 @@ "@vercel/analytics": "^1.2.2", "common-tags": "^1.8.2", "firebase": "^10.11.0", - "gpt3-tokenizer": "^1.1.5", "highlight.js": "^11.9.0", "highlightjs-copy": "^1.0.5", "javascript-time-ago": "^2.5.10", + "js-tiktoken": "^1.0.14", "openai": "^4.38.2", "random-word-slugs": "^0.1.7", "sse.js": "^2.4.1", @@ -111,11 +111,11 @@ } }, "node_modules/@azure/msal-node": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-2.7.0.tgz", - "integrity": "sha512-wXD8LkUvHICeSWZydqg6o8Yvv+grlBEcmLGu+QEI4FcwFendbTEZrlSygnAXXSOCVaGAirWLchca35qrgpO6Jw==", + "version": "2.13.1", + "resolved": "https://registry.npmjs.org/@azure/msal-node/-/msal-node-2.13.1.tgz", + "integrity": "sha512-sijfzPNorKt6+9g1/miHwhj6Iapff4mPQx1azmmZExgzUROqWTM1o3ACyxDja0g47VpowFy/sxTM/WsuCyXTiw==", "dependencies": { - "@azure/msal-common": "14.9.0", + "@azure/msal-common": "14.14.2", "jsonwebtoken": "^9.0.0", "uuid": "^8.3.0" }, @@ -123,6 +123,14 @@ "node": ">=16" } }, + "node_modules/@azure/msal-node/node_modules/@azure/msal-common": { + "version": "14.14.2", + "resolved": "https://registry.npmjs.org/@azure/msal-common/-/msal-common-14.14.2.tgz", + "integrity": "sha512-XV0P5kSNwDwCA/SjIxTe9mEAsKB0NqGNSuaVrkCCE2lAyBr/D6YtD80Vkdp4tjWnPFwjzkwldjr1xU/facOJog==", + "engines": { + "node": ">=0.8.0" + } + }, "node_modules/@azure/msal-node/node_modules/uuid": { "version": "8.3.2", "resolved": "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz", @@ -1105,9 +1113,9 @@ "integrity": "sha512-9TANp6GPoMtYzQdt54kfAyMmz1+osLlXdg2ENroU7zzrtflTLrrC/lgrIfaSe+Wu0b89GKccT7vxXA0MoAIO+Q==" }, "node_modules/@grpc/grpc-js": { - "version": "1.9.14", - "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.9.14.tgz", - "integrity": "sha512-nOpuzZ2G3IuMFN+UPPpKrC6NsLmWsTqSsm66IRfnBt1D4pwTqE27lmbpcPM+l2Ua4gE7PfjRHI6uedAy7hoXUw==", + "version": "1.9.15", + "resolved": "https://registry.npmjs.org/@grpc/grpc-js/-/grpc-js-1.9.15.tgz", + "integrity": "sha512-nqE7Hc0AzI+euzUwDAy0aY5hCp10r734gMGRdU+qOPX0XSceI2ULrcXB5U2xSc5VkWwalCj4M7GzCAygZl2KoQ==", "dependencies": { "@grpc/proto-loader": "^0.7.8", "@types/node": ">=12.12.47" @@ -2294,11 +2302,6 @@ "dequal": "^2.0.3" } }, - "node_modules/array-keyed-map": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/array-keyed-map/-/array-keyed-map-2.1.3.tgz", - "integrity": "sha512-JIUwuFakO+jHjxyp4YgSiKXSZeC0U+R1jR94bXWBcVlFRBycqXlb+kH9JHxBGcxnVuSqx5bnn0Qz9xtSeKOjiA==" - }, "node_modules/array-union": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", @@ -2370,6 +2373,25 @@ "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", "dev": true }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, "node_modules/binary-extensions": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", @@ -2398,12 +2420,12 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -3377,9 +3399,9 @@ "dev": true }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "dependencies": { "to-regex-range": "^5.0.1" @@ -3710,17 +3732,6 @@ "integrity": "sha512-uHJgbwAMwNFf5mLst7IWLNg14x1CkeqglJb/K3doi4dw6q2IvAAmM/Y81kevy83wP+Sst+nutFTYOGg3d1lsxg==", "dev": true }, - "node_modules/gpt3-tokenizer": { - "version": "1.1.5", - "resolved": "https://registry.npmjs.org/gpt3-tokenizer/-/gpt3-tokenizer-1.1.5.tgz", - "integrity": "sha512-O9iCL8MqGR0Oe9wTh0YftzIbysypNQmS5a5JG3cB3M4LMYjlAVvNnf8LUzVY9MrI7tj+YLY356uHtO2lLX2HpA==", - "dependencies": { - "array-keyed-map": "^2.1.3" - }, - "engines": { - "node": ">=12" - } - }, "node_modules/graceful-fs": { "version": "4.2.11", "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.11.tgz", @@ -3992,6 +4003,14 @@ "jiti": "bin/jiti.js" } }, + "node_modules/js-tiktoken": { + "version": "1.0.14", + "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.14.tgz", + "integrity": "sha512-Pk3l3WOgM9joguZY2k52+jH82RtABRgB5RdGFZNUGbOKGMVlNmafcPA3b0ITcCZPu1L9UclP1tne6aw7ZI4Myg==", + "dependencies": { + "base64-js": "^1.5.1" + } + }, "node_modules/json-buffer": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", @@ -4290,12 +4309,12 @@ } }, "node_modules/micromatch": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", - "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", "dev": true, "dependencies": { - "braces": "^3.0.2", + "braces": "^3.0.3", "picomatch": "^2.3.1" }, "engines": { @@ -7904,9 +7923,9 @@ } }, "node_modules/svelte": { - "version": "4.2.15", - "resolved": "https://registry.npmjs.org/svelte/-/svelte-4.2.15.tgz", - "integrity": "sha512-j9KJSccHgLeRERPlhMKrCXpk2TqL2m5Z+k+OBTQhZOhIdCCd3WfqV+ylPWeipEwq17P/ekiSFWwrVQv93i3bsg==", + "version": "4.2.19", + "resolved": "https://registry.npmjs.org/svelte/-/svelte-4.2.19.tgz", + "integrity": "sha512-IY1rnGr6izd10B0A8LqsBfmlT5OILVuZ7XsI0vdGPEvuonFV7NYEUK4dAkm9Zg2q0Um92kYjTpS1CAP3Nh/KWw==", "dependencies": { "@ampproject/remapping": "^2.2.1", "@jridgewell/sourcemap-codec": "^1.4.15", @@ -8243,9 +8262,9 @@ } }, "node_modules/tar": { - "version": "6.2.0", - "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.0.tgz", - "integrity": "sha512-/Wo7DcT0u5HUV486xg675HtjNd3BXZ6xDbzsCUZPt5iw8bTQ63bP0Raut3mvro9u+CUyq7YQd8Cx55fsZXxqLQ==", + "version": "6.2.1", + "resolved": "https://registry.npmjs.org/tar/-/tar-6.2.1.tgz", + "integrity": "sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==", "dev": true, "dependencies": { "chownr": "^2.0.0", diff --git a/package.json b/package.json index 04457e9..b55d73f 100644 --- a/package.json +++ b/package.json @@ -59,10 +59,10 @@ "@vercel/analytics": "^1.2.2", "common-tags": "^1.8.2", "firebase": "^10.11.0", - "gpt3-tokenizer": "^1.1.5", "highlight.js": "^11.9.0", "highlightjs-copy": "^1.0.5", "javascript-time-ago": "^2.5.10", + "js-tiktoken": "^1.0.14", "openai": "^4.38.2", "random-word-slugs": "^0.1.7", "sse.js": "^2.4.1", diff --git a/src/misc/openai.ts b/src/misc/openai.ts index a2ad536..2ee7e10 100644 --- a/src/misc/openai.ts +++ b/src/misc/openai.ts @@ -1,34 +1,31 @@ import type { Chat, ChatCost, ChatMessage } from './shared'; -import GPT3Tokenizer from 'gpt3-tokenizer'; +import { encodingForModel } from 'js-tiktoken'; import { ChatStorekeeper } from './chatStorekeeper'; // Initialization is slow, so only do it once. -// TypeScript misinterprets the export default class GPT3Tokenizer from gpt3-tokenizer -// and throws "TypeError: GPT3Tokenizer is not a constructor" if we try to call the ctor here. -// Therefore, we initialize the tokenizer in the first call to countTokens(). -let tokenizer: GPT3Tokenizer; +let tokenizer = encodingForModel('gpt-4o'); export enum AiProvider { - OpenAi = 'OpenAI', - Mistral = 'Mistral', - Meta = 'Meta' + OpenAi = 'OpenAI', + Mistral = 'Mistral', + Meta = 'Meta' } export enum AiModel { - Gpt35Turbo = 'gpt-3.5-turbo', - Gpt4o = 'gpt-4o', - Gpt4 = 'gpt-4', - Gpt432k = 'gpt-4-32k', - Gpt41106preview = 'gpt-4-1106-preview', - Gpt4Turbo = 'gpt-4-turbo', - Gpt4TurboPreview = 'gpt-4-turbo-preview', - MistralLarge = 'mistral-large-latest', - Llama38b = 'llama3-8b-8192', - Llama370b = 'llama3-70b-8192' + Gpt35Turbo = 'gpt-3.5-turbo', + Gpt4o = 'gpt-4o', + Gpt4 = 'gpt-4', + Gpt432k = 'gpt-4-32k', + Gpt41106preview = 'gpt-4-1106-preview', + Gpt4Turbo = 'gpt-4-turbo', + Gpt4TurboPreview = 'gpt-4-turbo-preview', + MistralLarge = 'mistral-large-latest', + Llama38b = 'llama3-8b-8192', + Llama370b = 'llama3-70b-8192' } export interface AiSettings { - model: AiModel; + model: AiModel; max_tokens: number; // just for completions temperature: number; // 0-2 top_p: number; // 0-1 @@ -36,104 +33,104 @@ export interface AiSettings { } export const defaultOpenAiSettings: AiSettings = { - model: AiModel.Gpt4o, + model: AiModel.Gpt4o, max_tokens: 4072, // Manually adjusted - temperature: 1, - top_p: 1 + temperature: 1, + top_p: 1 }; export interface AiModelStats { - provider: AiProvider; + provider: AiProvider; maxTokens: number; // The max tokens you allow GPT to respond with contextWindow: number; // The max tokens an AI model can handle. costInput: number; // $ per 1M tokens, see https://openai.com/pricing: costOutput: number; // $ per 1M tokens, see https://openai.com/pricing: middlewareDeploymentName?: string; // the "Azure" model - hidden?: boolean; + hidden?: boolean; } export const models: { [key in AiModel]: AiModelStats } = { - [AiModel.Gpt35Turbo]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 16384, - costInput: 0.5, - costOutput: 1.5, - middlewareDeploymentName: 'gpt-35-turbo' - }, - [AiModel.Gpt4Turbo]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 10, - costOutput: 30, - middlewareDeploymentName: 'gpt-4-turbo' - }, - [AiModel.Gpt4o]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 5, - costOutput: 15 - }, - [AiModel.MistralLarge]: { - provider: AiProvider.Mistral, - maxTokens: 4096, - contextWindow: 32768, - costInput: 8, - costOutput: 24, - middlewareDeploymentName: 'mistral-large' - }, - [AiModel.Llama38b]: { - provider: AiProvider.Meta, - maxTokens: 8192, - contextWindow: 8192, - costInput: 0.05, - costOutput: 0.1, - middlewareDeploymentName: 'llama3-8b' - }, - [AiModel.Llama370b]: { - provider: AiProvider.Meta, - maxTokens: 8192, - contextWindow: 8192, - costInput: 0.59, - costOutput: 0.79, - middlewareDeploymentName: 'llama3-70b' - }, - // deprecated, only here for backwards compatibility + [AiModel.Gpt35Turbo]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 16384, + costInput: 0.5, + costOutput: 1.5, + middlewareDeploymentName: 'gpt-35-turbo' + }, + [AiModel.Gpt4Turbo]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 10, + costOutput: 30, + middlewareDeploymentName: 'gpt-4-turbo' + }, + [AiModel.Gpt4o]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 5, + costOutput: 15 + }, + [AiModel.MistralLarge]: { + provider: AiProvider.Mistral, + maxTokens: 4096, + contextWindow: 32768, + costInput: 8, + costOutput: 24, + middlewareDeploymentName: 'mistral-large' + }, + [AiModel.Llama38b]: { + provider: AiProvider.Meta, + maxTokens: 8192, + contextWindow: 8192, + costInput: 0.05, + costOutput: 0.1, + middlewareDeploymentName: 'llama3-8b' + }, + [AiModel.Llama370b]: { + provider: AiProvider.Meta, + maxTokens: 8192, + contextWindow: 8192, + costInput: 0.59, + costOutput: 0.79, + middlewareDeploymentName: 'llama3-70b' + }, + // deprecated, only here for backwards compatibility [AiModel.Gpt4TurboPreview]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 10, - costOutput: 30, - middlewareDeploymentName: 'gpt-4-turbo', - hidden: true - }, - [AiModel.Gpt41106preview]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 10, - costOutput: 30, - hidden: true - }, - [AiModel.Gpt4]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 8192, - costInput: 30, - costOutput: 60, - hidden: true - }, - [AiModel.Gpt432k]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 32768, - costInput: 60, - costOutput: 120, - hidden: true - } + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 10, + costOutput: 30, + middlewareDeploymentName: 'gpt-4-turbo', + hidden: true + }, + [AiModel.Gpt41106preview]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 10, + costOutput: 30, + hidden: true + }, + [AiModel.Gpt4]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 8192, + costInput: 30, + costOutput: 60, + hidden: true + }, + [AiModel.Gpt432k]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 32768, + costInput: 60, + costOutput: 120, + hidden: true + } }; export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, AiProvider.Meta]; @@ -142,73 +139,64 @@ export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, A * see https://github.com/syonfox/GPT-3-Encoder/issues/2 */ export function countTokens(message: ChatMessage): number { - // see comment above - if (!tokenizer) { - tokenizer = new GPT3Tokenizer({ type: 'gpt3' }); - } - - let num_tokens = 4; // every message follows {role/name}\n{content}\n - for (const [key, value] of Object.entries(message)) { - if (key !== 'name' && key !== 'role' && key !== 'content') { - continue; - } - const encoded: { bpe: number[]; text: string[] } = tokenizer.encode(value); - num_tokens += encoded.text.length; - if (key === 'name') { - num_tokens--; // if there's a name, the role is omitted - } - } - - return num_tokens; + let num_tokens = 4; // every message follows {role/name}\n{content}\n + for (const [key, value] of Object.entries(message)) { + if (key !== 'name' && key !== 'role' && key !== 'content') { + continue; + } + const tokens: number[] = tokenizer.encode(value); + num_tokens += tokens.length; + if (key === 'name') { + num_tokens--; // if there's a name, the role is omitted + } + } + + return num_tokens; } -export function modelExists(modelName: OpenAiModel): boolean { - return modelName in models; +export function modelExists(modelName: AiModel): boolean { + return modelName in models; } export function estimateChatCost(chat: Chat): ChatCost { - let tokensPrompt = 0; - let tokensCompletion = 0; - - const messages = ChatStorekeeper.getCurrentMessageBranch(chat); - - for (const message of messages) { - if (message.role === 'assistant') { - tokensCompletion += countTokens(message); - } else { - tokensPrompt += countTokens(message); - } - } - - // see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander - const tokensTotal = tokensPrompt + tokensCompletion + 2; // every reply is primed with assistant - const { contextWindow, costInput, costOutput } = models[chat.settings.model]; - const costPrompt = (costInput / 1000000.0) * tokensPrompt; - const costCompletion = (costOutput / 1000000.0) * tokensCompletion; - - return { - tokensPrompt, - tokensCompletion, - tokensTotal: tokensTotal, - costPrompt, - costCompletion, - costTotal: costPrompt + costCompletion, - maxTokensForModel: contextWindow - }; + let tokensPrompt = 0; + let tokensCompletion = 0; + + const messages = ChatStorekeeper.getCurrentMessageBranch(chat); + + for (const message of messages) { + if (message.role === 'assistant') { + tokensCompletion += countTokens(message); + } else { + tokensPrompt += countTokens(message); + } + } + + const tokensTotal = tokensPrompt + tokensCompletion + 2; + const { contextWindow, costInput, costOutput } = models[chat.settings.model]; + const costPrompt = (costInput / 1000000.0) * tokensPrompt; + const costCompletion = (costOutput / 1000000.0) * tokensCompletion; + + return { + tokensPrompt, + tokensCompletion, + tokensTotal: tokensTotal, + costPrompt, + costCompletion, + costTotal: costPrompt + costCompletion, + maxTokensForModel: contextWindow + }; } export function getProviderForModel(model: AiModel) { - const result = AiProvider.OpenAi; - if (model) { - if (model.includes('llama')) { - return AiProvider.Meta; - } else if (model.includes('mistral')) { - return AiProvider.Mistral; - } - } - return result; + if (model.includes('llama')) { + return AiProvider.Meta; + } else if (model.includes('mistral')) { + return AiProvider.Mistral; + } + return AiProvider.OpenAi; } export function getDefaultModelForProvider(provider: AiProvider) { - return Object.keys(models).find((key) => models[key as AiModel].provider === provider) as AiModel; -} + return Object.keys(models).find((key) => models[key as AiModel].provider === provider) as AiModel; +} \ No newline at end of file From 9dea0c76e5a6b540f0fb726efeebe9e32ae62de2 Mon Sep 17 00:00:00 2001 From: Anajrim <115026399+Anajrim01@users.noreply.github.com> Date: Thu, 12 Sep 2024 04:00:45 +0200 Subject: [PATCH 2/5] fix indent --- src/misc/openai.ts | 295 +++++++++++++++++++++++---------------------- 1 file changed, 148 insertions(+), 147 deletions(-) diff --git a/src/misc/openai.ts b/src/misc/openai.ts index 2ee7e10..b46703e 100644 --- a/src/misc/openai.ts +++ b/src/misc/openai.ts @@ -5,27 +5,28 @@ import { ChatStorekeeper } from './chatStorekeeper'; // Initialization is slow, so only do it once. let tokenizer = encodingForModel('gpt-4o'); + export enum AiProvider { - OpenAi = 'OpenAI', - Mistral = 'Mistral', - Meta = 'Meta' + OpenAi = 'OpenAI', + Mistral = 'Mistral', + Meta = 'Meta' } export enum AiModel { - Gpt35Turbo = 'gpt-3.5-turbo', - Gpt4o = 'gpt-4o', - Gpt4 = 'gpt-4', - Gpt432k = 'gpt-4-32k', - Gpt41106preview = 'gpt-4-1106-preview', - Gpt4Turbo = 'gpt-4-turbo', - Gpt4TurboPreview = 'gpt-4-turbo-preview', - MistralLarge = 'mistral-large-latest', - Llama38b = 'llama3-8b-8192', - Llama370b = 'llama3-70b-8192' + Gpt35Turbo = 'gpt-3.5-turbo', + Gpt4o = 'gpt-4o', + Gpt4 = 'gpt-4', + Gpt432k = 'gpt-4-32k', + Gpt41106preview = 'gpt-4-1106-preview', + Gpt4Turbo = 'gpt-4-turbo', + Gpt4TurboPreview = 'gpt-4-turbo-preview', + MistralLarge = 'mistral-large-latest', + Llama38b = 'llama3-8b-8192', + Llama370b = 'llama3-70b-8192' } export interface AiSettings { - model: AiModel; + model: AiModel; max_tokens: number; // just for completions temperature: number; // 0-2 top_p: number; // 0-1 @@ -33,104 +34,104 @@ export interface AiSettings { } export const defaultOpenAiSettings: AiSettings = { - model: AiModel.Gpt4o, + model: AiModel.Gpt4o, max_tokens: 4072, // Manually adjusted - temperature: 1, - top_p: 1 + temperature: 1, + top_p: 1 }; export interface AiModelStats { - provider: AiProvider; + provider: AiProvider; maxTokens: number; // The max tokens you allow GPT to respond with contextWindow: number; // The max tokens an AI model can handle. costInput: number; // $ per 1M tokens, see https://openai.com/pricing: costOutput: number; // $ per 1M tokens, see https://openai.com/pricing: middlewareDeploymentName?: string; // the "Azure" model - hidden?: boolean; + hidden?: boolean; } export const models: { [key in AiModel]: AiModelStats } = { - [AiModel.Gpt35Turbo]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 16384, - costInput: 0.5, - costOutput: 1.5, - middlewareDeploymentName: 'gpt-35-turbo' - }, - [AiModel.Gpt4Turbo]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 10, - costOutput: 30, - middlewareDeploymentName: 'gpt-4-turbo' - }, - [AiModel.Gpt4o]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 5, - costOutput: 15 - }, - [AiModel.MistralLarge]: { - provider: AiProvider.Mistral, - maxTokens: 4096, - contextWindow: 32768, - costInput: 8, - costOutput: 24, - middlewareDeploymentName: 'mistral-large' - }, - [AiModel.Llama38b]: { - provider: AiProvider.Meta, - maxTokens: 8192, - contextWindow: 8192, - costInput: 0.05, - costOutput: 0.1, - middlewareDeploymentName: 'llama3-8b' - }, - [AiModel.Llama370b]: { - provider: AiProvider.Meta, - maxTokens: 8192, - contextWindow: 8192, - costInput: 0.59, - costOutput: 0.79, - middlewareDeploymentName: 'llama3-70b' - }, - // deprecated, only here for backwards compatibility + [AiModel.Gpt35Turbo]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 16384, + costInput: 0.5, + costOutput: 1.5, + middlewareDeploymentName: 'gpt-35-turbo' + }, + [AiModel.Gpt4Turbo]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 10, + costOutput: 30, + middlewareDeploymentName: 'gpt-4-turbo' + }, + [AiModel.Gpt4o]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 5, + costOutput: 15 + }, + [AiModel.MistralLarge]: { + provider: AiProvider.Mistral, + maxTokens: 4096, + contextWindow: 32768, + costInput: 8, + costOutput: 24, + middlewareDeploymentName: 'mistral-large' + }, + [AiModel.Llama38b]: { + provider: AiProvider.Meta, + maxTokens: 8192, + contextWindow: 8192, + costInput: 0.05, + costOutput: 0.1, + middlewareDeploymentName: 'llama3-8b' + }, + [AiModel.Llama370b]: { + provider: AiProvider.Meta, + maxTokens: 8192, + contextWindow: 8192, + costInput: 0.59, + costOutput: 0.79, + middlewareDeploymentName: 'llama3-70b' + }, + // deprecated, only here for backwards compatibility [AiModel.Gpt4TurboPreview]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 10, - costOutput: 30, - middlewareDeploymentName: 'gpt-4-turbo', - hidden: true - }, - [AiModel.Gpt41106preview]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 128000, - costInput: 10, - costOutput: 30, - hidden: true - }, - [AiModel.Gpt4]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 8192, - costInput: 30, - costOutput: 60, - hidden: true - }, - [AiModel.Gpt432k]: { - provider: AiProvider.OpenAi, - maxTokens: 4096, - contextWindow: 32768, - costInput: 60, - costOutput: 120, - hidden: true - } + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 10, + costOutput: 30, + middlewareDeploymentName: 'gpt-4-turbo', + hidden: true + }, + [AiModel.Gpt41106preview]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 128000, + costInput: 10, + costOutput: 30, + hidden: true + }, + [AiModel.Gpt4]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 8192, + costInput: 30, + costOutput: 60, + hidden: true + }, + [AiModel.Gpt432k]: { + provider: AiProvider.OpenAi, + maxTokens: 4096, + contextWindow: 32768, + costInput: 60, + costOutput: 120, + hidden: true + } }; export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, AiProvider.Meta]; @@ -139,64 +140,64 @@ export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, A * see https://github.com/syonfox/GPT-3-Encoder/issues/2 */ export function countTokens(message: ChatMessage): number { - let num_tokens = 4; // every message follows {role/name}\n{content}\n - for (const [key, value] of Object.entries(message)) { - if (key !== 'name' && key !== 'role' && key !== 'content') { - continue; - } - const tokens: number[] = tokenizer.encode(value); - num_tokens += tokens.length; - if (key === 'name') { - num_tokens--; // if there's a name, the role is omitted - } - } - - return num_tokens; + let num_tokens = 4; // every message follows {role/name}\n{content}\n + for (const [key, value] of Object.entries(message)) { + if (key !== 'name' && key !== 'role' && key !== 'content') { + continue; + } + const tokens: number[] = tokenizer.encode(value); + num_tokens += tokens.length; + if (key === 'name') { + num_tokens--; // if there's a name, the role is omitted + } + } + + return num_tokens; } export function modelExists(modelName: AiModel): boolean { - return modelName in models; + return modelName in models; } export function estimateChatCost(chat: Chat): ChatCost { - let tokensPrompt = 0; - let tokensCompletion = 0; - - const messages = ChatStorekeeper.getCurrentMessageBranch(chat); - - for (const message of messages) { - if (message.role === 'assistant') { - tokensCompletion += countTokens(message); - } else { - tokensPrompt += countTokens(message); - } - } - - const tokensTotal = tokensPrompt + tokensCompletion + 2; - const { contextWindow, costInput, costOutput } = models[chat.settings.model]; - const costPrompt = (costInput / 1000000.0) * tokensPrompt; - const costCompletion = (costOutput / 1000000.0) * tokensCompletion; - - return { - tokensPrompt, - tokensCompletion, - tokensTotal: tokensTotal, - costPrompt, - costCompletion, - costTotal: costPrompt + costCompletion, - maxTokensForModel: contextWindow - }; + let tokensPrompt = 0; + let tokensCompletion = 0; + + const messages = ChatStorekeeper.getCurrentMessageBranch(chat); + + for (const message of messages) { + if (message.role === 'assistant') { + tokensCompletion += countTokens(message); + } else { + tokensPrompt += countTokens(message); + } + } + + const tokensTotal = tokensPrompt + tokensCompletion + 2; + const { contextWindow, costInput, costOutput } = models[chat.settings.model]; + const costPrompt = (costInput / 1000000.0) * tokensPrompt; + const costCompletion = (costOutput / 1000000.0) * tokensCompletion; + + return { + tokensPrompt, + tokensCompletion, + tokensTotal: tokensTotal, + costPrompt, + costCompletion, + costTotal: costPrompt + costCompletion, + maxTokensForModel: contextWindow + }; } export function getProviderForModel(model: AiModel) { - if (model.includes('llama')) { - return AiProvider.Meta; - } else if (model.includes('mistral')) { - return AiProvider.Mistral; - } - return AiProvider.OpenAi; + if (model.includes('llama')) { + return AiProvider.Meta; + } else if (model.includes('mistral')) { + return AiProvider.Mistral; + } + return AiProvider.OpenAi; } export function getDefaultModelForProvider(provider: AiProvider) { - return Object.keys(models).find((key) => models[key as AiModel].provider === provider) as AiModel; + return Object.keys(models).find((key) => models[key as AiModel].provider === provider) as AiModel; } \ No newline at end of file From b12a6c8f5a40f69c0b373d72d70c23eeeedd6123 Mon Sep 17 00:00:00 2001 From: Anajrim <115026399+Anajrim01@users.noreply.github.com> Date: Thu, 12 Sep 2024 04:03:09 +0200 Subject: [PATCH 3/5] revert undo comment --- src/misc/openai.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/misc/openai.ts b/src/misc/openai.ts index b46703e..b481e0c 100644 --- a/src/misc/openai.ts +++ b/src/misc/openai.ts @@ -173,7 +173,8 @@ export function estimateChatCost(chat: Chat): ChatCost { } } - const tokensTotal = tokensPrompt + tokensCompletion + 2; + // see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander + const tokensTotal = tokensPrompt + tokensCompletion + 2; // every reply is primed with assistant const { contextWindow, costInput, costOutput } = models[chat.settings.model]; const costPrompt = (costInput / 1000000.0) * tokensPrompt; const costCompletion = (costOutput / 1000000.0) * tokensCompletion; From f0a44cdf39b6cb582fda4de94c7d9426322bb483 Mon Sep 17 00:00:00 2001 From: Anajrim <115026399+Anajrim01@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:49:13 +0200 Subject: [PATCH 4/5] refactor --- src/misc/openai.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/misc/openai.ts b/src/misc/openai.ts index b481e0c..4af49c9 100644 --- a/src/misc/openai.ts +++ b/src/misc/openai.ts @@ -3,8 +3,7 @@ import { encodingForModel } from 'js-tiktoken'; import { ChatStorekeeper } from './chatStorekeeper'; // Initialization is slow, so only do it once. -let tokenizer = encodingForModel('gpt-4o'); - +let tokenizer = encodingForModel('gpt-4-turbo'); export enum AiProvider { OpenAi = 'OpenAI', @@ -137,7 +136,6 @@ export const models: { [key in AiModel]: AiModelStats } = { export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, AiProvider.Meta]; /** * see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander - * see https://github.com/syonfox/GPT-3-Encoder/issues/2 */ export function countTokens(message: ChatMessage): number { let num_tokens = 4; // every message follows {role/name}\n{content}\n @@ -166,10 +164,11 @@ export function estimateChatCost(chat: Chat): ChatCost { const messages = ChatStorekeeper.getCurrentMessageBranch(chat); for (const message of messages) { + const tokens = countTokens(message); if (message.role === 'assistant') { - tokensCompletion += countTokens(message); + tokensCompletion += tokens; } else { - tokensPrompt += countTokens(message); + tokensPrompt += tokens; } } From edbe766ead05669d4224090ac0d6f7af9871e22f Mon Sep 17 00:00:00 2001 From: Anajrim <115026399+Anajrim01@users.noreply.github.com> Date: Fri, 13 Sep 2024 00:08:05 +0200 Subject: [PATCH 5/5] refactor code --- src/misc/openai.ts | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/misc/openai.ts b/src/misc/openai.ts index 4af49c9..1daaf47 100644 --- a/src/misc/openai.ts +++ b/src/misc/openai.ts @@ -3,7 +3,7 @@ import { encodingForModel } from 'js-tiktoken'; import { ChatStorekeeper } from './chatStorekeeper'; // Initialization is slow, so only do it once. -let tokenizer = encodingForModel('gpt-4-turbo'); +const tokenizer = encodingForModel('gpt-4-turbo'); export enum AiProvider { OpenAi = 'OpenAI', @@ -49,7 +49,7 @@ export interface AiModelStats { hidden?: boolean; } -export const models: { [key in AiModel]: AiModelStats } = { +export const models: Record = { [AiModel.Gpt35Turbo]: { provider: AiProvider.OpenAi, maxTokens: 4096, @@ -134,22 +134,18 @@ export const models: { [key in AiModel]: AiModelStats } = { }; export const providers: AiProvider[] = [AiProvider.OpenAi, AiProvider.Mistral, AiProvider.Meta]; + /** * see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander */ export function countTokens(message: ChatMessage): number { let num_tokens = 4; // every message follows {role/name}\n{content}\n for (const [key, value] of Object.entries(message)) { - if (key !== 'name' && key !== 'role' && key !== 'content') { - continue; - } - const tokens: number[] = tokenizer.encode(value); - num_tokens += tokens.length; - if (key === 'name') { - num_tokens--; // if there's a name, the role is omitted + if (key === 'name' || key === 'role' || key === 'content') { + const tokensCount = tokenizer.encode(value).length; + num_tokens += (key === 'name') ? tokensCount - 1 : tokensCount; } } - return num_tokens; } @@ -175,13 +171,13 @@ export function estimateChatCost(chat: Chat): ChatCost { // see https://platform.openai.com/docs/guides/chat/introduction > Deep Dive Expander const tokensTotal = tokensPrompt + tokensCompletion + 2; // every reply is primed with assistant const { contextWindow, costInput, costOutput } = models[chat.settings.model]; - const costPrompt = (costInput / 1000000.0) * tokensPrompt; - const costCompletion = (costOutput / 1000000.0) * tokensCompletion; + const costPrompt = (costInput / 1_000_000) * tokensPrompt; + const costCompletion = (costOutput / 1_000_000) * tokensCompletion; return { tokensPrompt, tokensCompletion, - tokensTotal: tokensTotal, + tokensTotal, costPrompt, costCompletion, costTotal: costPrompt + costCompletion, @@ -189,7 +185,7 @@ export function estimateChatCost(chat: Chat): ChatCost { }; } -export function getProviderForModel(model: AiModel) { +export function getProviderForModel(model: AiModel): AiProvider { if (model.includes('llama')) { return AiProvider.Meta; } else if (model.includes('mistral')) { @@ -198,6 +194,6 @@ export function getProviderForModel(model: AiModel) { return AiProvider.OpenAi; } -export function getDefaultModelForProvider(provider: AiProvider) { - return Object.keys(models).find((key) => models[key as AiModel].provider === provider) as AiModel; +export function getDefaultModelForProvider(provider: AiProvider): AiModel { + return (Object.keys(models) as AiModel[]).find(key => models[key].provider === provider)!; } \ No newline at end of file