diff --git a/docs/AB_TESTING.md b/docs/AB_TESTING.md index 2f0766eb7..6f2cf9757 100644 --- a/docs/AB_TESTING.md +++ b/docs/AB_TESTING.md @@ -3,31 +3,23 @@ A/B testing in Codex shows two translation suggestions side‑by‑side once in a while so you can pick the better one. This helps us learn which retrieval and prompting strategies work best without slowing you down. ## How it works -- Triggering: Tests run at random with a small probability (default 15%). +- Triggering: Tests run at random with a hardcoded probability of 1% (1 in 100). This is defined by `AB_TEST_PROBABILITY` in `src/utils/abTestingRegistry.ts`. - Variants: When triggered, two candidates are generated in parallel. - Auto‑apply: If the two results are effectively identical, we apply one automatically and no modal is shown. - Choosing: If they differ, a simple chooser appears; click the option that reads best. Dismissing the modal after choosing just closes it. -- Frequency control: In the chooser, “See less/See more” nudges how often you’ll be asked in the future. ## What’s being compared - Search algorithm for few‑shot retrieval: `fts5-bm25` vs `sbs`. - Few‑shot example format: `source-and-target` vs `target-only`. (Model comparisons are disabled by default.) -## Settings -- `codex-editor-extension.abTestingEnabled`: turn A/B testing on/off. -- `codex-editor-extension.abTestingProbability`: probability (0–1) for running a true A/B test. Default: `0.15` (15%). - -Change these in VS Code Settings → Extensions → Codex Editor. - ## Results & privacy - Local log: Each choice is appended to `files/ab-test-results.jsonl` in your workspace (newline‑delimited JSON). - Win rates: The editor may compute simple win‑rates by variant label and show them in the chooser. - Network: If analytics posting is enabled in code, the extension may attempt to send anonymized A/B summaries to a configured endpoint. If your environment blocks network access, the extension continues without error. ## Disable A/B testing -- Set `codex-editor-extension.abTestingEnabled` to `false`, or -- Set `codex-editor-extension.abTestingProbability` to `0`. +Set `AB_TEST_PROBABILITY` to `0` in `src/utils/abTestingRegistry.ts`. ## Developer pointers (optional) - Registry and helpers: `src/utils/abTestingRegistry.ts`, `src/utils/abTestingSetup.ts`. diff --git a/package.json b/package.json index ccef3077b..e91b61fe2 100644 --- a/package.json +++ b/package.json @@ -931,20 +931,6 @@ "default": false, "description": "When enabled, AI will only use translation pairs that have been validated by users as examples for few-shot prompting. This ensures higher quality examples but may reduce the number of available examples." }, - "codex-editor-extension.abTestingEnabled": { - "title": "Enable A/B Testing", - "type": "boolean", - "default": true, - "description": "Enables lightweight A/B tests during completions. Tests are globally gated by probability and each comparison shows exactly two options." - }, - "codex-editor-extension.abTestingProbability": { - "title": "A/B Testing Probability", - "type": "number", - "default": 0.15, - "minimum": 0, - "maximum": 1, - "description": "Probability (0-1) that any eligible event will run a true A/B test. When not triggered, the system returns identical variants to keep UX consistent without doubling compute." - }, "codex-editor-extension.searchAlgorithm": { "title": "Search Algorithm", "type": "string", diff --git a/src/copilotSettings/copilotSettings.ts b/src/copilotSettings/copilotSettings.ts index 42aea35ce..168337706 100644 --- a/src/copilotSettings/copilotSettings.ts +++ b/src/copilotSettings/copilotSettings.ts @@ -340,7 +340,6 @@ export async function generateChatSystemMessage( numberOfFewShotExamples: 0, debugMode: false, useOnlyValidatedExamples: false, - abTestingEnabled: false, allowHtmlPredictions: allowHtmlPredictions, fewShotExampleFormat: "source-and-target", }; diff --git a/src/projectManager/utils/migrationUtils.ts b/src/projectManager/utils/migrationUtils.ts index 3beb532e3..577c342a2 100644 --- a/src/projectManager/utils/migrationUtils.ts +++ b/src/projectManager/utils/migrationUtils.ts @@ -1186,9 +1186,6 @@ export const migration_lineNumbersSettings = async (context?: vscode.ExtensionCo } }; -// Gently migrate A/B testing probability from older explicit 25% to 5% with user consent -// (removed) migration_abTestingProbabilityDefault — intentionally deleted for now - async function analyzeFileForLineNumbers(fileUri: vscode.Uri): Promise { try { // Read the file content using serializer for proper deserialization diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts index 17215eb4f..b42227efd 100644 --- a/src/providers/translationSuggestions/llmCompletion.ts +++ b/src/providers/translationSuggestions/llmCompletion.ts @@ -7,7 +7,7 @@ import { CodexCellTypes } from "../../../types/enums"; import { getAutoCompleteStatusBarItem } from "../../extension"; import { tokenizeText } from "../../utils/nlpUtils"; import { buildFewShotExamplesText, buildMessages, fetchFewShotExamples, getPrecedingTranslationPairs } from "./shared"; -import { abTestingRegistry } from "../../utils/abTestingRegistry"; +import { abTestingRegistry, AB_TEST_PROBABILITY } from "../../utils/abTestingRegistry"; // Helper function to build A/B test context object function buildABTestContext( @@ -249,23 +249,21 @@ export async function llmCompletion( // A/B testing is disabled during batch operations (chapter autocomplete, batch transcription) // to avoid interrupting the user with variant selection UI const extConfig = vscode.workspace.getConfiguration("codex-editor-extension"); - const abEnabled = Boolean(extConfig.get("abTestingEnabled") ?? true) && !isBatchOperation; - const abProbabilityRaw = extConfig.get("abTestingProbability"); - const abProbability = Math.max(0, Math.min(1, typeof abProbabilityRaw === "number" ? abProbabilityRaw : 0.15)); + // A/B testing is always enabled but skipped during batch operations. + // Probability is hardcoded in AB_TEST_PROBABILITY (single source of truth). + const abEnabled = !isBatchOperation; const randomValue = Math.random(); - const triggerAB = abEnabled && randomValue < abProbability; + const triggerAB = abEnabled && randomValue < AB_TEST_PROBABILITY; if (completionConfig.debugMode) { - console.debug(`[llmCompletion] A/B testing: enabled=${abEnabled}, isBatchOperation=${isBatchOperation}, probability=${abProbability}, random=${randomValue.toFixed(3)}, trigger=${triggerAB}`); + console.debug(`[llmCompletion] A/B testing: enabled=${abEnabled}, isBatchOperation=${isBatchOperation}, probability=${AB_TEST_PROBABILITY}, random=${randomValue.toFixed(3)}, trigger=${triggerAB}`); } if (!triggerAB && completionConfig.debugMode) { if (isBatchOperation) { console.debug(`[llmCompletion] A/B testing disabled during batch operation`); - } else if (!abEnabled) { - console.debug(`[llmCompletion] A/B testing disabled in settings`); } else { - console.debug(`[llmCompletion] A/B test not triggered (random ${randomValue.toFixed(3)} >= probability ${abProbability})`); + console.debug(`[llmCompletion] A/B test not triggered (random ${randomValue.toFixed(3)} >= probability ${AB_TEST_PROBABILITY})`); } } diff --git a/src/test/suite/validatedOnlyExamples.test.ts b/src/test/suite/validatedOnlyExamples.test.ts index 1e78ef4b8..1633acbe6 100644 --- a/src/test/suite/validatedOnlyExamples.test.ts +++ b/src/test/suite/validatedOnlyExamples.test.ts @@ -117,8 +117,6 @@ suite("Validated-only examples behavior", () => { if (section === "codex-editor-extension") { return { get: (key: string) => { - if (key === "abTestingEnabled") return true; - if (key === "abTestingProbability") return 1; // force if (key === "useOnlyValidatedExamples") return true; if (key === "searchAlgorithm") return "sbs"; return (cfg as any)?.get?.(key); diff --git a/src/utils/abTestingRegistry.ts b/src/utils/abTestingRegistry.ts index 559cad989..3c441d593 100644 --- a/src/utils/abTestingRegistry.ts +++ b/src/utils/abTestingRegistry.ts @@ -1,3 +1,9 @@ +/** + * Probability (0–1) that any eligible completion triggers a local A/B test. + * 0.01 = 1 in 100. Change this single constant to adjust frequency everywhere. + */ +export const AB_TEST_PROBABILITY = 0.01; + type ABTestResultPayload = TVariant[] | { variants: TVariant[]; isAttentionCheck?: boolean; diff --git a/src/utils/llmUtils.ts b/src/utils/llmUtils.ts index 85fddca98..584d240f4 100644 --- a/src/utils/llmUtils.ts +++ b/src/utils/llmUtils.ts @@ -340,7 +340,6 @@ export interface CompletionConfig { numberOfFewShotExamples: number; debugMode: boolean; useOnlyValidatedExamples: boolean; - abTestingEnabled: boolean; // legacy flag; kept for type compatibility allowHtmlPredictions?: boolean; // whether to preserve HTML in examples and predictions fewShotExampleFormat: string; // format for few-shot examples: 'source-and-target' or 'target-only' } @@ -369,8 +368,6 @@ export async function fetchCompletionConfig(): Promise { numberOfFewShotExamples: (config.get("numberOfFewShotExamples") as number) || 30, debugMode: config.get("debugMode") === true || config.get("debugMode") === "true", useOnlyValidatedExamples: useOnlyValidatedExamples as boolean, - // A/B testing flag kept for compatibility; registry handles gating - abTestingEnabled: (config.get("abTestingEnabled") as boolean) ?? true, allowHtmlPredictions: (config.get("allowHtmlPredictions") as boolean) || false, fewShotExampleFormat: (config.get("fewShotExampleFormat") as string) || "source-and-target", }; diff --git a/types/index.d.ts b/types/index.d.ts index 676bed2f6..accc8bc3b 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -2163,8 +2163,7 @@ type EditorReceiveMessages = } | { type: "providerUpdatesTextDirection"; textDirection: "ltr" | "rtl"; } | { type: "providerSendsLLMCompletionResponse"; content: { completion: string; cellId: string; }; } - | { type: "providerSendsABTestVariants"; content: { variants: string[]; cellId: string; testId: string; testName?: string; names?: string[]; abProbability?: number; }; } - | { type: "abTestingProbabilityUpdated"; content: { value: number; }; } + | { type: "providerSendsABTestVariants"; content: { variants: string[]; cellId: string; testId: string; testName?: string; names?: string[]; }; } | { type: "jumpToSection"; content: string; } | { type: "providerUpdatesNotebookMetadataForWebview"; content: CustomNotebookMetadata; } | { type: "updateVideoUrlInWebview"; content: string; } diff --git a/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx b/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx index 66a9c80e9..6951e675a 100755 --- a/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx @@ -355,7 +355,6 @@ const CodexCellEditor: React.FC = () => { testId: string; testName?: string; names?: string[]; - abProbability?: number; }>({ isActive: false, variants: [], @@ -1373,7 +1372,7 @@ const CodexCellEditor: React.FC = () => { }, setAudioAttachments: setAudioAttachments, showABTestVariants: (data) => { - const { variants, cellId, testId, testName, names, abProbability } = data as any; + const { variants, cellId, testId, testName, names } = data as any; const count = Array.isArray(variants) ? variants.length : 0; debug("ab-test", "Received A/B test variants:", { cellId, count }); @@ -1423,7 +1422,6 @@ const CodexCellEditor: React.FC = () => { testId, testName, names, - abProbability, }); return; }