diff --git a/src/copilotSettings/copilotSettings.ts b/src/copilotSettings/copilotSettings.ts index 42aea35ce..b97124266 100644 --- a/src/copilotSettings/copilotSettings.ts +++ b/src/copilotSettings/copilotSettings.ts @@ -351,7 +351,7 @@ export async function generateChatSystemMessage( const prompt = `Generate a concise, one-paragraph set of linguistic instructions critical for a linguistically informed translator to keep in mind at all times when translating from ${sourceLanguage.refName} to ${targetLanguage.refName}. Keep it to a single plaintext paragraph. Note key lexicosemantic, information structuring, register-relevant and other key distinctions necessary for grammatical, natural text in ${targetLanguage.refName} if the starting place is ${sourceLanguage.refName}. ${htmlInstruction} Preserve original line breaks from by returning text with the same number of lines separated by newline characters. Do not include XML in your answer.`; - const response = await callLLM( + const result = await callLLM( [ { role: "user", @@ -361,7 +361,7 @@ export async function generateChatSystemMessage( llmConfig ); - return response; + return result.text; } catch (error) { debug("[generateChatSystemMessage] Error generating message:", error); return null; diff --git a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts index 1dd4f1e6c..1ce4a9fde 100644 --- a/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts +++ b/src/providers/codexCellEditorProvider/codexCellEditorMessagehandling.ts @@ -836,6 +836,15 @@ const messageHandlers: Record Promise; debug("llmCompletion message received", { event, document, provider, webviewPanel }); + // Fire-and-forget: record single-cell translation telemetry + import("../../utils/abTestingAnalytics").then(({ recordAbResult }) => + recordAbResult({ + category: "batch_vs_single", + options: ["single", "batch"], + winner: 0, + }) + ).catch(() => { /* analytics must never block translation */ }); + const cellId = typedEvent.content.currentLineId; const addContentToValue = typedEvent.content.addContentToValue; @@ -1442,10 +1451,19 @@ const messageHandlers: Record Promise { const typedEvent = event as Extract; - const { cellId, selectedIndex, selectedContent, testId, testName, selectionTimeMs, variants } = typedEvent.content || {}; - const variantNames: string[] | undefined = variants; + const { cellId, selectedIndex, selectedContent, testId, testName, variants, models } = typedEvent.content || {}; const isRecovery = testName === "Recovery" || (typeof testId === "string" && testId.includes("-recovery-")); + // Decrement pending A/B test count so normal source highlighting can resume + if (provider.pendingABTestCount > 0) { + provider.pendingABTestCount--; + } + + // For model comparison tests, use model names as the analytics options; + // otherwise fall back to the variant text (existing behavior). + const isModelComparison = testName === "model_comparison" && Array.isArray(models) && models.length > 0; + const variantNames: string[] | undefined = isModelComparison ? models : variants; + // Check if this was a pending attention check const attentionCheck = getAttentionCheck(testId); @@ -1459,7 +1477,6 @@ const messageHandlers: Record Promise Promise Promise Promise { diff --git a/src/providers/codexCellEditorProvider/codexCellEditorProvider.ts b/src/providers/codexCellEditorProvider/codexCellEditorProvider.ts index 52f625eba..7bf7d8673 100755 --- a/src/providers/codexCellEditorProvider/codexCellEditorProvider.ts +++ b/src/providers/codexCellEditorProvider/codexCellEditorProvider.ts @@ -2,7 +2,7 @@ import * as vscode from "vscode"; import { fetchCompletionConfig } from "@/utils/llmUtils"; import { CodexNotebookReader } from "../../serializer"; import { workspaceStoreListener } from "../../utils/workspaceEventListener"; -import { llmCompletion } from "../translationSuggestions/llmCompletion"; +import { llmCompletion, LLMCompletionResult } from "../translationSuggestions/llmCompletion"; import { CodexCellTypes, EditType } from "../../../types/enums"; import { QuillCellContent, @@ -143,6 +143,10 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider 0, A/B tests are awaiting user selection — source highlighting + // is driven by the webview's A/B queue instead of normal cell navigation. + public pendingABTestCount: number = 0; + // New state for autocompletion process public autocompletionState: { isProcessing: boolean; @@ -335,6 +339,15 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider 0 && + this.autocompletionState.isProcessing; + if (suppressSourceHighlight) { + debug("Suppressing source highlight during A/B test queue"); + return; + } debug("Processing codex file highlight"); // Send highlight using cellId (primary) or globalReferences (if available) for (const [panelUri, panel] of this.webviewPanels.entries()) { @@ -1641,6 +1654,15 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider + recordAbResult({ + category: "batch_vs_single", + options: ["single", "batch"], + winner: 1, + }) + ).catch(() => { /* analytics must never block translation */ }); + // Determine if LLM is ready (API key or auth token). We still run transcriptions even if not ready. let llmReady = true; try { @@ -2541,10 +2563,19 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider 0 && this.autocompletionState.isProcessing; + for (const [panelUri, panel] of this.webviewPanels.entries()) { const isSourceFile = this.isSourceText(panelUri); // copy this to update target with merged cells if (isSourceFile) { + if (suppressSourceHighlight) { + continue; + } + // Check if this is the matching source file const isMatchingSource = sourceUri && panelUri === sourceUri.toString(); @@ -3386,7 +3417,7 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider 1); @@ -3407,8 +3438,8 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider 1) { - const { variants, testId, testName, isAttentionCheck, correctIndex, decoyCellId } = completionResult as any; + if (completionResult && Array.isArray(completionResult.variants) && completionResult.variants.length > 1) { + const { variants, testId, testName, isAttentionCheck, correctIndex, decoyCellId, models } = completionResult; // If variants are identical (ignoring whitespace), treat as single completion try { @@ -3432,21 +3463,21 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider 0 ? { models } : {}), }, }); + this.pendingABTestCount++; } - // Mark single cell translation as complete so UI progress/spinners stop - this.updateSingleCellTranslation(1.0); + if (isBatchOperation) { + // NON-BLOCKING path: don't write to the cell yet — the user + // needs to pick a variant first via the A/B selector. The + // selectABTestVariant handler will persist their choice. + // The queue continues immediately without waiting. + // Source panel scrolling is driven by the webview based on + // which A/B test is currently displayed (not queued). + this.updateSingleCellTranslation(1.0); + + debug("LLM completion A/B variants sent (batch non-blocking, awaiting user selection)", { + cellId: currentCellId, + variantsCount: variants?.length, + }); + return ""; + } - // Do not update the cell value now; the frontend will apply the chosen variant - // Return an empty string for consistency with callers expecting a string + // BLOCKING path (single-cell): do not update the cell value now; + // the frontend will apply the chosen variant when the user selects one. + this.updateSingleCellTranslation(1.0); debug("LLM completion A/B variants sent", { cellId: currentCellId, variantsCount: variants?.length }); return ""; } // Otherwise, handle as a single completion using the first variant - const singleCompletion = (completionResult as any)?.variants?.[0] ?? ""; + const singleCompletion = completionResult?.variants?.[0] ?? ""; progress.report({ message: "Updating document...", increment: 40 }); diff --git a/src/providers/translationSuggestions/llmCompletion.ts b/src/providers/translationSuggestions/llmCompletion.ts index 17215eb4f..ae4eafa09 100644 --- a/src/providers/translationSuggestions/llmCompletion.ts +++ b/src/providers/translationSuggestions/llmCompletion.ts @@ -90,6 +90,8 @@ export interface LLMCompletionResult { isAttentionCheck?: boolean; correctIndex?: number; decoyCellId?: string; + /** Model identifiers for server-initiated model comparison A/B tests. */ + models?: string[]; } export async function llmCompletion( @@ -246,10 +248,10 @@ export async function llmCompletion( ); // Unified AB testing via registry with random test selection (global gating) - // A/B testing is disabled during batch operations (chapter autocomplete, batch transcription) - // to avoid interrupting the user with variant selection UI + // A/B tests can fire during batch operations — the caller queues them + // non-blockingly for user selection const extConfig = vscode.workspace.getConfiguration("codex-editor-extension"); - const abEnabled = Boolean(extConfig.get("abTestingEnabled") ?? true) && !isBatchOperation; + const abEnabled = Boolean(extConfig.get("abTestingEnabled") ?? true); const abProbabilityRaw = extConfig.get("abTestingProbability"); const abProbability = Math.max(0, Math.min(1, typeof abProbabilityRaw === "number" ? abProbabilityRaw : 0.15)); const randomValue = Math.random(); @@ -260,9 +262,7 @@ export async function llmCompletion( } if (!triggerAB && completionConfig.debugMode) { - if (isBatchOperation) { - console.debug(`[llmCompletion] A/B testing disabled during batch operation`); - } else if (!abEnabled) { + if (!abEnabled) { console.debug(`[llmCompletion] A/B testing disabled in settings`); } else { console.debug(`[llmCompletion] A/B test not triggered (random ${randomValue.toFixed(3)} >= probability ${abProbability})`); @@ -313,10 +313,33 @@ export async function llmCompletion( } } - // A/B testing not triggered (or failed): call LLM once, return two identical variants - const completion = await callLLM(messages, completionConfig, token); + // A/B testing not triggered (or failed): call LLM with ab_eligible flag + // so the server can optionally return a multi-model A/B test response. + const llmResult = await callLLM(messages, completionConfig, token, /* abEligible */ true); const allowHtml = Boolean(completionConfig.allowHtmlPredictions); + // If the server returned a multi-model A/B test, build the result from it + if (llmResult.abTest) { + const serverVariants = llmResult.abTest.variants.map((txt) => + postProcessABTestResult(txt, allowHtml, returnHTML) + ); + if (completionConfig.debugMode) { + console.debug( + `[llmCompletion] Server returned model A/B test: models=${llmResult.abTest.models.join(", ")}, variants=${serverVariants.length}` + ); + } + return { + variants: serverVariants, + isABTest: true, + testId: `${currentCellId}-model-${Date.now()}`, + testName: "model_comparison", + models: llmResult.abTest.models, + }; + } + + // Standard single-completion path + const completion = llmResult.text; + // Preserve multi-line completions: strip any leading "->" markers per line, then join with
const lines = (completion || "").split(/\r?\n/); const processed = lines diff --git a/src/test/suite/codexCellEditorProvider.test.ts b/src/test/suite/codexCellEditorProvider.test.ts index 19578b139..2d07b19cd 100644 --- a/src/test/suite/codexCellEditorProvider.test.ts +++ b/src/test/suite/codexCellEditorProvider.test.ts @@ -3667,7 +3667,7 @@ suite("CodexCellEditorProvider Test Suite", () => { const llmUtils = await import("../../utils/llmUtils"); const callLLMStub = sinon.stub(llmUtils, "callLLM").callsFake(async (messages: any[]) => { capturedMessages = messages; - return "Mocked LLM response"; + return { text: "Mocked LLM response" }; }); // Stub status bar item @@ -3809,7 +3809,7 @@ suite("CodexCellEditorProvider Test Suite", () => { // Mock callLLM const llmUtils = await import("../../utils/llmUtils"); - const callLLMStub = sinon.stub(llmUtils, "callLLM").resolves("Mocked response"); + const callLLMStub = sinon.stub(llmUtils, "callLLM").resolves({ text: "Mocked response" }); // Stub status bar and notebook reader const extModule = await import("../../extension"); @@ -3912,7 +3912,7 @@ suite("CodexCellEditorProvider Test Suite", () => { // Mock callLLM const llmUtils = await import("../../utils/llmUtils"); - const callLLMStub = sinon.stub(llmUtils, "callLLM").resolves("Mocked response"); + const callLLMStub = sinon.stub(llmUtils, "callLLM").resolves({ text: "Mocked response" }); // Stub status bar and notebook reader const extModule = await import("../../extension"); @@ -4016,7 +4016,7 @@ suite("CodexCellEditorProvider Test Suite", () => { const llmUtils = await import("../../utils/llmUtils"); const callLLMStub = sinon.stub(llmUtils, "callLLM").callsFake(async (messages: any[]) => { capturedMessages = messages; - return "Mocked response"; + return { text: "Mocked response" }; }); // Stub MetadataManager.getChatSystemMessage to return custom system message @@ -4161,7 +4161,7 @@ suite("CodexCellEditorProvider Test Suite", () => { const llmUtils = await import("../../utils/llmUtils"); const callLLMStub = sinon.stub(llmUtils, "callLLM").callsFake(async (messages: any[]) => { capturedMessages = messages; - return "Mocked response"; + return { text: "Mocked response" }; }); // Stub status bar and notebook reader @@ -4301,7 +4301,7 @@ suite("CodexCellEditorProvider Test Suite", () => { // Mock callLLM to capture messages const llmUtils = await import("../../utils/llmUtils"); const callLLMStub = sinon.stub(llmUtils, "callLLM").callsFake(async (messages: any[]) => { - return "Mocked response"; + return { text: "Mocked response" }; }); // Stub status bar and notebook reader @@ -4450,7 +4450,7 @@ suite("CodexCellEditorProvider Test Suite", () => { let capturedMessages: any[] | null = null; const callLLMStub = sinon.stub(llmUtils, "callLLM").callsFake(async (messages: any[]) => { capturedMessages = messages; - return "Mocked response"; + return { text: "Mocked response" }; }); // Stub status bar and notebook reader @@ -4616,7 +4616,6 @@ suite("CodexCellEditorProvider Test Suite", () => { testId: "test-123", testName: "Example Count Test", selectedContent: "Test variant B", - selectionTimeMs: 1500, totalVariants: 2, variants: ["15 examples", "30 examples"] } @@ -4669,7 +4668,6 @@ suite("CodexCellEditorProvider Test Suite", () => { selectedIndex: 0, testId: "test-789", testName: "Test Name", - selectionTimeMs: 2000, totalVariants: 2, names: ["variant-a", "variant-b"] }; @@ -4682,7 +4680,6 @@ suite("CodexCellEditorProvider Test Suite", () => { testResult.testId, testResult.cellId, testResult.selectedIndex, - testResult.selectionTimeMs, testResult.names, undefined // Skip testName to prevent analytics call ); diff --git a/src/test/suite/validatedOnlyExamples.test.ts b/src/test/suite/validatedOnlyExamples.test.ts index 1e78ef4b8..6417148b8 100644 --- a/src/test/suite/validatedOnlyExamples.test.ts +++ b/src/test/suite/validatedOnlyExamples.test.ts @@ -157,7 +157,7 @@ suite("Validated-only examples behavior", () => { // Stub callLLM to avoid network and return deterministic strings for AB variants const llmUtils = await import("../../utils/llmUtils"); - const callStub = sinon.stub(llmUtils, "callLLM").resolves("PREDICTED"); + const callStub = sinon.stub(llmUtils, "callLLM").resolves({ text: "PREDICTED" }); // Stub status bar item used by llmCompletion const extModule = await import("../../extension"); diff --git a/src/utils/abTestingSetup.ts b/src/utils/abTestingSetup.ts index 13eb2d8c4..0b915079f 100644 --- a/src/utils/abTestingSetup.ts +++ b/src/utils/abTestingSetup.ts @@ -45,7 +45,8 @@ async function generateCompletionFromPairs( "source-and-target" ); - return await callLLM(msgs, ctx.completionConfig, ctx.token); + const result = await callLLM(msgs, ctx.completionConfig, ctx.token); + return result.text; } export function initializeABTesting() { diff --git a/src/utils/abTestingUtils.ts b/src/utils/abTestingUtils.ts index 85d7dae55..79efef303 100644 --- a/src/utils/abTestingUtils.ts +++ b/src/utils/abTestingUtils.ts @@ -5,7 +5,6 @@ export async function recordVariantSelection( testId: string, cellId: string, selectedIndex: number, - selectionTimeMs: number, names?: string[], testName?: string ): Promise { @@ -38,7 +37,6 @@ export async function recordAttentionCheckResult(args: { testId: string; cellId: string; passed: boolean; - selectionTimeMs: number; correctIndex?: number; decoyCellId?: string; }): Promise { diff --git a/src/utils/llmUtils.ts b/src/utils/llmUtils.ts index 85fddca98..6c318e5a2 100644 --- a/src/utils/llmUtils.ts +++ b/src/utils/llmUtils.ts @@ -5,57 +5,218 @@ import { getAuthApi } from "../extension"; import * as vscode from "vscode"; import { MetadataManager } from "./metadataManager"; +/** Result returned by callLLM. Always contains `text`; when the server runs a + * model A/B test it also populates `abTest` with the multi-variant payload. */ +export interface LLMCallResult { + /** Single completion text (normal path, or first variant for A/B). */ + text: string; + /** Present only when the server returns a multi-model A/B test response. */ + abTest?: { + variants: string[]; + models: string[]; + }; +} + +/** Shape of the server's A/B test JSON response. */ +interface ServerABTestResponse { + variants: string[]; + models: string[]; + is_ab_test: true; +} + +/** + * Resolves the Frontier LLM endpoint and auth token. + * Extracted so both the OpenAI-SDK path and the direct-fetch path can reuse it. + */ +async function resolveFrontierAuth(config: CompletionConfig): Promise<{ + endpoint: string; + authBearerToken: string | undefined; + isFrontierEndpoint: boolean; +}> { + const isFrontierEndpoint = config.endpoint.includes("frontierrnd.com"); + let llmEndpoint: string | undefined; + let authBearerToken: string | undefined; + + const hasCustomApiKey = config.apiKey && config.apiKey.trim().length > 0; + const shouldUseFrontierAuth = isFrontierEndpoint || !hasCustomApiKey; + + if (shouldUseFrontierAuth) { + try { + const frontierApi = getAuthApi(); + if (frontierApi) { + llmEndpoint = await frontierApi.getLlmEndpoint(); + authBearerToken = await frontierApi.authProvider.getToken(); + } + } catch (error) { + console.debug("Could not get LLM endpoint from auth API:", error); + } + + if (llmEndpoint) { + config.endpoint = llmEndpoint; + } + } + + return { endpoint: config.endpoint, authBearerToken, isFrontierEndpoint }; +} + +/** + * Makes a direct fetch() call to the LLM endpoint with `ab_eligible: true`. + * If the server returns an A/B test response, returns the multi-variant payload. + * Otherwise parses the standard OpenAI-compatible JSON and returns a single completion. + */ +async function fetchWithABEligible( + messages: ChatMessage[], + config: CompletionConfig, + endpoint: string, + authBearerToken: string | undefined, + isFrontierEndpoint: boolean, + cancellationToken?: vscode.CancellationToken +): Promise { + const model = "default"; + const body: Record = { + model, + messages: messages.map((m) => ({ role: m.role, content: m.content })), + ab_eligible: true, + }; + + const headers: Record = { "Content-Type": "application/json" }; + if (authBearerToken) { + headers["Authorization"] = `Bearer ${authBearerToken}`; + } else if (!isFrontierEndpoint && config.apiKey) { + headers["Authorization"] = `Bearer ${config.apiKey}`; + } + + // Build the URL — the endpoint may already include /chat/completions or just a base + let url = endpoint; + if (!url.endsWith("/chat/completions")) { + url = url.replace(/\/+$/, "") + "/chat/completions"; + } + + const abortController = new AbortController(); + let cancellationListener: vscode.Disposable | undefined; + if (cancellationToken) { + cancellationListener = cancellationToken.onCancellationRequested(() => { + abortController.abort(); + }); + } + + try { + const response = await fetch(url, { + method: "POST", + headers, + body: JSON.stringify(body), + signal: abortController.signal, + }); + + if (!response.ok) { + const status = response.status; + if (status === 401) { + try { + const now = Date.now(); + const key = "codex.lastAuthErrorTs"; + const last = (globalThis as unknown as Record)[key]; + if (!last || now - last > 5000) { + vscode.window.showErrorMessage( + "Authentication failed for LLM. Set an API key or sign in to your LLM provider." + ); + (globalThis as unknown as Record)[key] = now; + } + } catch { + // no-op + } + throw new vscode.CancellationError(); + } + throw new Error(`LLM request failed with status ${status}`); + } + + const json = await response.json() as Record; + + // Check for server A/B test response + if ( + json.is_ab_test === true && + Array.isArray(json.variants) && + Array.isArray(json.models) + ) { + const abResponse = json as unknown as ServerABTestResponse; + return { + text: abResponse.variants[0]?.trim() ?? "", + abTest: { + variants: abResponse.variants, + models: abResponse.models, + }, + }; + } + + // Standard OpenAI-compatible response + const choices = json.choices as Array<{ message?: { content?: string } }> | undefined; + if (choices && choices.length > 0 && choices[0].message) { + return { text: choices[0].message.content?.trim() ?? "" }; + } + + throw new Error("Unexpected response format from the LLM; callLLM() A/B fetch failed"); + } catch (error: unknown) { + const err = error as { name?: string }; + if ( + err.name === "AbortError" || + cancellationToken?.isCancellationRequested + ) { + throw new vscode.CancellationError(); + } + if (error instanceof vscode.CancellationError) { + throw error; + } + throw error; + } finally { + cancellationListener?.dispose(); + } +} + /** * Calls the Language Model (LLM) with the given messages and configuration. * * @param messages - An array of ChatMessage objects representing the conversation history. * @param config - The CompletionConfig object containing LLM configuration settings. * @param cancellationToken - Optional cancellation token to cancel the request - * @returns A Promise that resolves to the LLM's response as a string. + * @param abEligible - When true, signals the server that this request is eligible for + * a multi-model A/B test. Uses direct fetch() instead of the OpenAI SDK so it can + * handle the custom server response shape. + * @returns A Promise that resolves to an LLMCallResult. * @throws Error if the LLM response is unexpected or if there's an error during the API call. */ export async function callLLM( messages: ChatMessage[], config: CompletionConfig, - cancellationToken?: vscode.CancellationToken -): Promise { + cancellationToken?: vscode.CancellationToken, + abEligible: boolean = false +): Promise { try { // Check for cancellation before starting if (cancellationToken?.isCancellationRequested) { throw new vscode.CancellationError(); } - // Check if using Frontier endpoint (requires Frontier auth, not API key) - const isFrontierEndpoint = config.endpoint.includes("frontierrnd.com"); - let llmEndpoint: string | undefined; - let authBearerToken: string | undefined; - - // Use Frontier auth if: using Frontier endpoint OR no custom API key - const hasCustomApiKey = config.apiKey && config.apiKey.trim().length > 0; - const shouldUseFrontierAuth = isFrontierEndpoint || !hasCustomApiKey; - - if (shouldUseFrontierAuth) { - try { - const frontierApi = getAuthApi(); - if (frontierApi) { - llmEndpoint = await frontierApi.getLlmEndpoint(); - authBearerToken = await frontierApi.authProvider.getToken(); - } - } catch (error) { - console.debug("Could not get LLM endpoint from auth API:", error); - } - - if (llmEndpoint) { - config.endpoint = llmEndpoint; - } - } + // Resolve endpoint and auth once for both code paths + const { endpoint, authBearerToken, isFrontierEndpoint } = + await resolveFrontierAuth(config); - // Check for cancellation before creating OpenAI client + // Check for cancellation after auth resolution if (cancellationToken?.isCancellationRequested) { throw new vscode.CancellationError(); } - // For Frontier endpoint, use auth token; for other endpoints, use API key + // --- A/B eligible path: direct fetch() so we can handle the custom response shape --- + if (abEligible) { + return await fetchWithABEligible( + messages, + config, + endpoint, + authBearerToken, + isFrontierEndpoint, + cancellationToken + ); + } + + // --- Standard path: OpenAI SDK --- const openai = new OpenAI({ apiKey: isFrontierEndpoint ? "" : config.apiKey, baseURL: config.endpoint, @@ -67,7 +228,7 @@ export async function callLLM( }); const model = "default"; - if ((config as any)?.debugMode) { + if (config.debugMode) { console.debug("[callLLM] model", model); } @@ -108,17 +269,18 @@ export async function callLLM( completion.choices.length > 0 && completion.choices[0].message ) { - return completion.choices[0].message.content?.trim() ?? ""; + return { text: completion.choices[0].message.content?.trim() ?? "" }; } else { throw new Error( "Unexpected response format from the LLM; callLLM() failed - case 1" ); } - } catch (error: any) { + } catch (error: unknown) { cleanup(); + const err = error as { name?: string }; // Check if the error is due to cancellation - if (error.name === 'AbortError' || cancellationToken.isCancellationRequested) { + if (err.name === 'AbortError' || cancellationToken.isCancellationRequested) { throw new vscode.CancellationError(); } @@ -137,31 +299,32 @@ export async function callLLM( completion.choices.length > 0 && completion.choices[0].message ) { - return completion.choices[0].message.content?.trim() ?? ""; + return { text: completion.choices[0].message.content?.trim() ?? "" }; } else { throw new Error( "Unexpected response format from the LLM; callLLM() failed - case 1" ); } } - } catch (error: any) { + } catch (error: unknown) { if (error instanceof vscode.CancellationError) { throw error; // Re-throw cancellation errors as-is } - const status = (error?.response?.status ?? error?.status) as number | undefined; - const isAuthError = status === 401 || String(error?.message || "").includes("401"); + const err = error as { response?: { status?: number }; status?: number; message?: string }; + const status = (err?.response?.status ?? err?.status) as number | undefined; + const isAuthError = status === 401 || String(err?.message || "").includes("401"); if (isAuthError) { // Throttle the auth error toast try { const now = Date.now(); const key = "codex.lastAuthErrorTs"; - const last = (globalThis as any)[key] as number | undefined; + const last = (globalThis as unknown as Record)[key]; if (!last || now - last > 5000) { vscode.window.showErrorMessage( "Authentication failed for LLM. Set an API key or sign in to your LLM provider." ); - (globalThis as any)[key] = now; + (globalThis as unknown as Record)[key] = now; } } catch { // no-op @@ -200,7 +363,7 @@ export async function performReflection( systemContent += "Provide a grade from 0 to 100 where 0 is the lowest grade and 100 is the highest grade and a grade comment.\n"; - const response = await callLLM( + const result = await callLLM( [ { role: "system", @@ -215,13 +378,13 @@ export async function performReflection( cancellationToken ); - return response; + return result.text; } async function generateSummary(improvements: Promise[]): Promise { const results = await Promise.all(improvements); const summarizedContent = results.join("\n\n"); - const summary = await callLLM( + const result = await callLLM( [ { role: "system", @@ -237,7 +400,7 @@ export async function performReflection( config, cancellationToken ); - return summary.trim(); + return result.text.trim(); } async function implementImprovements( @@ -260,7 +423,7 @@ export async function performReflection( ], config, cancellationToken - ); + ).then((r) => r.text); }); return await improvedText; } catch (error) { @@ -284,9 +447,9 @@ export async function performReflection( config, cancellationToken ) - .then((distilledText) => { + .then((result) => { // Some basic post-processing to remove any trailing whitespace - return distilledText.trim(); + return result.text.trim(); }) .catch((error) => { console.error("Error implementing improvements:", error); diff --git a/types/index.d.ts b/types/index.d.ts index 607ac7ba2..9a8b996dd 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -766,11 +766,12 @@ export type EditorPostMessages = cellId: string; selectedIndex: number; testId: string; - selectionTimeMs: number; totalVariants?: number; selectedContent?: string; testName?: string; variants?: string[]; + /** Model identifiers for server-initiated model comparison tests. */ + models?: string[]; }; } | { command: "openLoginFlow"; } @@ -2133,7 +2134,7 @@ type EditorReceiveMessages = } | { type: "providerUpdatesTextDirection"; textDirection: "ltr" | "rtl"; } | { type: "providerSendsLLMCompletionResponse"; content: { completion: string; cellId: string; }; } - | { type: "providerSendsABTestVariants"; content: { variants: string[]; cellId: string; testId: string; testName?: string; }; } + | { type: "providerSendsABTestVariants"; content: { variants: string[]; cellId: string; testId: string; testName?: string; models?: string[]; }; } | { type: "jumpToSection"; content: string; } | { type: "providerUpdatesNotebookMetadataForWebview"; content: CustomNotebookMetadata; } | { type: "updateVideoUrlInWebview"; content: string; } diff --git a/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx b/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx index 46eda1d64..8d4564e8f 100755 --- a/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/CodexCellEditor.tsx @@ -24,6 +24,7 @@ import SourceCellContext from "./contextProviders/SourceCellContext"; import DuplicateCellResolver from "./DuplicateCellResolver"; import TimelineEditor from "./TimelineEditor"; import VideoTimelineEditor from "./VideoTimelineEditor"; +import { ABTestQueueItem } from "./abTestTypes"; import { getCellValueData, @@ -356,21 +357,9 @@ const CodexCellEditor: React.FC = () => { // Add correction editor mode state const [isCorrectionEditorMode, setIsCorrectionEditorMode] = useState(false); - // A/B testing state - const [abTestState, setAbTestState] = useState<{ - isActive: boolean; - variants: string[]; - cellId: string; - testId: string; - testName?: string; - names?: string[]; - abProbability?: number; - }>({ - isActive: false, - variants: [], - cellId: "", - testId: "", - }); + // A/B testing queue: items accumulate during batch, shown one at a time + const [abTestQueue, setAbTestQueue] = useState([]); + const currentABTest = abTestQueue.length > 0 ? abTestQueue[0] : null; const abTestOriginalContentRef = useRef>(new Map()); // Acquire VS Code API once at component initialization @@ -588,53 +577,49 @@ const CodexCellEditor: React.FC = () => { ); // A/B test variant selection handler - const handleVariantSelected = (selectedIndex: number, selectionTimeMs: number) => { - if (!abTestState.isActive) return; + const handleVariantSelected = (selectedIndex: number) => { + if (!currentABTest) return; - const selectedVariant = abTestState.variants[selectedIndex]; + const selectedVariant = currentABTest.variants[selectedIndex]; debug("ab-test", `User selected variant ${selectedIndex}:`, selectedVariant); // Apply the selected variant applyVariantToCell( - abTestState.cellId, + currentABTest.cellId, selectedVariant, - abTestState.testId, + currentABTest.testId, selectedIndex, - abTestState.variants.length, - selectionTimeMs, - abTestState.testName, - abTestState.names + currentABTest.variants.length, + currentABTest.testName, + currentABTest.names, + currentABTest.models ); // If this was a recovery selection, we're done with the original content snapshot. - if (abTestState.testName === "Recovery" || abTestState.testId.includes("-recovery-")) { - abTestOriginalContentRef.current.delete(abTestState.cellId); - } - - // Casual confirmation with variant name if available - const variantName = abTestState.variants?.[selectedIndex]; - if (variantName) { - vscode.postMessage({ - command: "showInfo", - text: `Applied translation from ${variantName}.`, - } as any); + if (currentABTest.testName === "Recovery" || currentABTest.testId.includes("-recovery-")) { + abTestOriginalContentRef.current.delete(currentABTest.cellId); } - // Keep A/B modal open to show names and stats; user can close manually - // Do not allow re-vote (selector component already blocks further clicks) + // Advance queue — next test appears automatically + setAbTestQueue((prev) => prev.slice(1)); }; const handleDismissABTest = () => { debug("ab-test", "A/B test dismissed"); - setAbTestState({ - isActive: false, - variants: [], - cellId: "", - testId: "", - testName: undefined, - }); + // Skip current test, advance to next + setAbTestQueue((prev) => prev.slice(1)); }; + // Scroll the source panel to the cell of the currently displayed A/B test + useEffect(() => { + if (currentABTest?.cellId) { + vscode.postMessage({ + command: "setCurrentIdToGlobalState", + content: { currentLineId: currentABTest.cellId }, + } as EditorPostMessages); + } + }, [currentABTest?.testId]); + // Helper function to apply a variant to a cell const applyVariantToCell = ( cellId: string, @@ -642,9 +627,9 @@ const CodexCellEditor: React.FC = () => { testId: string | undefined, selectedIndex: number, totalVariants: number, - selectionTimeMs: number = 0, testName?: string, - names?: string[] + names?: string[], + models?: string[] ) => { debug("ab-test", `Applying variant ${selectedIndex} to cell ${cellId}:`, variant); @@ -694,9 +679,9 @@ const CodexCellEditor: React.FC = () => { selectedIndex, selectedContent: variant, testId, - testName: testName || abTestState.testName, - selectionTimeMs: selectionTimeMs || 0, - variants: names || abTestState.variants, + testName: testName || currentABTest?.testName, + variants: names || currentABTest?.variants, + models: models || currentABTest?.models, }, } as EditorPostMessages); } @@ -1382,7 +1367,7 @@ const CodexCellEditor: React.FC = () => { }, setAudioAttachments: setAudioAttachments, showABTestVariants: (data) => { - const { variants, cellId, testId, testName, names, abProbability } = data as any; + const { variants, cellId, testId, testName, names, abProbability, models } = data as ABTestQueueItem; const count = Array.isArray(variants) ? variants.length : 0; debug("ab-test", "Received A/B test variants:", { cellId, count }); @@ -1424,21 +1409,16 @@ const CodexCellEditor: React.FC = () => { } } - // Show A/B selector UI - setAbTestState({ - isActive: true, - variants, - cellId, - testId, - testName, - names, - abProbability, - }); + // Queue A/B test — shown one at a time, next appears after selection + setAbTestQueue((prev) => [ + ...prev, + { variants, cellId, testId, testName, names, abProbability, models }, + ]); return; } // Otherwise, auto-apply first variant quietly (no modal) - applyVariantToCell(cellId, variants[0], testId, 0, count, 0, testName, names); + applyVariantToCell(cellId, variants[0], testId, 0, count, testName, names); }, // Milestone-based pagination handlers @@ -3098,14 +3078,16 @@ const CodexCellEditor: React.FC = () => { )} - {/* A/B Test Variant Selection Modal */} - {abTestState.isActive && ( + {/* A/B Test Variant Selection Modal — shows first item in queue */} + {currentABTest && ( handleVariantSelected(idx, ms)} + key={currentABTest.testId} + variants={currentABTest.variants} + cellId={currentABTest.cellId} + testId={currentABTest.testId} + queuePosition={1} + queueTotal={abTestQueue.length} + onVariantSelected={handleVariantSelected} onDismiss={handleDismissABTest} /> )} diff --git a/webviews/codex-webviews/src/CodexCellEditor/EditorWithABTesting.tsx b/webviews/codex-webviews/src/CodexCellEditor/EditorWithABTesting.tsx index 95cbf87e8..4a52f15b3 100644 --- a/webviews/codex-webviews/src/CodexCellEditor/EditorWithABTesting.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/EditorWithABTesting.tsx @@ -22,20 +22,13 @@ import { VSCodeButton } from "@vscode/webview-ui-toolkit/react"; import { processHtmlContent, updateFootnoteNumbering } from "./footnoteUtils"; import { ABTestVariantSelector } from "./components/ABTestVariantSelector"; import { useMessageHandler } from "./hooks/useCentralizedMessageDispatcher"; +import { ABTestQueueItem } from "./abTestTypes"; const icons: any = Quill.import("ui/icons"); const vscode: any = (window as any).vscodeApi; registerQuillSpellChecker(Quill, vscode); -interface ABTestState { - isActive: boolean; - variants: string[]; - cellId: string; - testId: string; - testName?: string; -} - export interface EditorRef { quillRef: React.RefObject; printContents: () => void; @@ -87,57 +80,36 @@ const EditorWithABTesting = forwardRef((props, ref) => { const [spellChecker, setSpellChecker] = useState(null); const [isEnabled, setIsEnabled] = useState(true); const [showHistory, setShowHistory] = useState(false); - const [abTestState, setAbTestState] = useState({ - isActive: false, - variants: [], - cellId: "", - testId: "", - testName: "", - }); + // A/B test queue: items accumulate during batch, shown one at a time + const [abTestQueue, setAbTestQueue] = useState([]); - // A/B Testing handlers - const handleShowABTestVariants = (data: { - variants: string[]; - cellId: string; - testId: string; - }) => { - setAbTestState({ - isActive: true, - variants: data.variants, - cellId: data.cellId, - testId: data.testId, - }); - }; + const currentABTest = abTestQueue.length > 0 ? abTestQueue[0] : null; - const handleVariantSelected = (selectedIndex: number, selectionTimeMs: number) => { - if (!abTestState.isActive) return; + const handleVariantSelected = (selectedIndex: number) => { + if (!currentABTest) return; // Send selection to backend - backend handles all logic vscode.postMessage({ command: "selectABTestVariant", content: { - cellId: abTestState.cellId, + cellId: currentABTest.cellId, selectedIndex, - testId: abTestState.testId, - testName: abTestState.testName, - selectionTimeMs, - totalVariants: abTestState.variants?.length ?? 0, - variants: abTestState.variants, + testId: currentABTest.testId, + testName: currentABTest.testName, + totalVariants: currentABTest.variants?.length ?? 0, + variants: currentABTest.variants, + // Pass model identifiers for server-initiated model comparison tests + ...(currentABTest.models && currentABTest.models.length > 0 ? { models: currentABTest.models } : {}), }, } as EditorPostMessages); - // Close the selector - backend will send new one if needed - handleDismissABTest(); + // Remove completed test from the front of the queue — next one shows automatically + setAbTestQueue((prev) => prev.slice(1)); }; const handleDismissABTest = () => { - setAbTestState({ - isActive: false, - variants: [], - cellId: "", - testId: "", - testName: "", - }); + // Dismiss current test without selecting (skip it) + setAbTestQueue((prev) => prev.slice(1)); }; const updateHeaderLabel = () => { @@ -171,25 +143,19 @@ const EditorWithABTesting = forwardRef((props, ref) => { ); } } else if (event.data.type === "providerSendsABTestVariants") { - // Handle A/B test variants: always show selector for user choice - const { variants, cellId, testId, testName } = event.data.content as { + // Handle A/B test variants: queue them up, show one at a time + const { variants, cellId, testId, testName, models } = event.data.content as { variants: string[]; cellId: string; testId: string; testName?: string; + models?: string[]; }; - if ( - cellId === props.currentLineId && - Array.isArray(variants) && - variants.length > 1 - ) { - setAbTestState({ - isActive: true, - variants, - cellId, - testId, - testName, - }); + if (Array.isArray(variants) && variants.length > 1) { + setAbTestQueue((prev) => [ + ...prev, + { variants, cellId, testId, testName, models }, + ]); } } updateHeaderLabel(); @@ -264,13 +230,15 @@ const EditorWithABTesting = forwardRef((props, ref) => { }} /> - {/* A/B Testing Overlay */} - {abTestState.isActive && ( + {/* A/B Testing Overlay — shows first item in queue */} + {currentABTest && ( diff --git a/webviews/codex-webviews/src/CodexCellEditor/__tests___/ABTestVariantSelector.test.tsx b/webviews/codex-webviews/src/CodexCellEditor/__tests___/ABTestVariantSelector.test.tsx index 306227bab..4850a623e 100644 --- a/webviews/codex-webviews/src/CodexCellEditor/__tests___/ABTestVariantSelector.test.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/__tests___/ABTestVariantSelector.test.tsx @@ -61,8 +61,7 @@ describe("ABTestVariantSelector", () => { await waitFor(() => { expect(defaultProps.onVariantSelected).toHaveBeenCalledWith( - 0, - expect.any(Number) + 0 ); }); }); @@ -75,8 +74,7 @@ describe("ABTestVariantSelector", () => { await waitFor(() => { expect(defaultProps.onVariantSelected).toHaveBeenCalledWith( - 1, - expect.any(Number) + 1 ); }); }); @@ -103,7 +101,7 @@ describe("ABTestVariantSelector", () => { }); }); - it("should record selection time when variant is clicked", async () => { + it("should emit selected index when variant is clicked", async () => { render(); const variant = screen.getByText("Translation variant A"); @@ -111,15 +109,9 @@ describe("ABTestVariantSelector", () => { await waitFor(() => { expect(defaultProps.onVariantSelected).toHaveBeenCalledWith( - expect.any(Number), expect.any(Number) ); }); - - const call = defaultProps.onVariantSelected.mock.calls[0]; - const selectionTime = call[1]; - expect(selectionTime).toBeGreaterThanOrEqual(0); - expect(selectionTime).toBeLessThan(10000); }); it("should pass correct index to onVariantSelected", async () => { @@ -130,8 +122,7 @@ describe("ABTestVariantSelector", () => { await waitFor(() => { expect(defaultProps.onVariantSelected).toHaveBeenCalledWith( - 1, - expect.any(Number) + 1 ); }); }); diff --git a/webviews/codex-webviews/src/CodexCellEditor/__tests___/AttentionCheckRecovery.test.tsx b/webviews/codex-webviews/src/CodexCellEditor/__tests___/AttentionCheckRecovery.test.tsx index 4d7261501..b6ac72ee3 100644 --- a/webviews/codex-webviews/src/CodexCellEditor/__tests___/AttentionCheckRecovery.test.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/__tests___/AttentionCheckRecovery.test.tsx @@ -88,7 +88,7 @@ describe("Attention Check Recovery Flow", () => { expect(screen.getByText(decoyTranslation)).toBeInTheDocument(); }); - it("should call onVariantSelected with selection time when variant is clicked", async () => { + it("should call onVariantSelected with selected index when variant is clicked", async () => { const onVariantSelected = vi.fn(); const props = { variants: [correctTranslation, decoyTranslation], @@ -105,8 +105,7 @@ describe("Attention Check Recovery Flow", () => { await waitFor(() => { expect(onVariantSelected).toHaveBeenCalledWith( - expect.any(Number), // index - expect.any(Number) // selectionTimeMs + expect.any(Number) // index ); }); }); @@ -231,7 +230,7 @@ describe("Attention Check Recovery Flow", () => { fireEvent.click(correctOption); await waitFor(() => { - expect(onVariantSelected).toHaveBeenCalledWith(0, expect.any(Number)); + expect(onVariantSelected).toHaveBeenCalledWith(0); }); // Verify the "Thanks" message appears @@ -258,7 +257,7 @@ describe("Attention Check Recovery Flow", () => { fireEvent.click(decoyOption); await waitFor(() => { - expect(onVariantSelected).toHaveBeenCalledWith(1, expect.any(Number)); + expect(onVariantSelected).toHaveBeenCalledWith(1); }); }); diff --git a/webviews/codex-webviews/src/CodexCellEditor/abTestTypes.ts b/webviews/codex-webviews/src/CodexCellEditor/abTestTypes.ts new file mode 100644 index 000000000..79a8aef0a --- /dev/null +++ b/webviews/codex-webviews/src/CodexCellEditor/abTestTypes.ts @@ -0,0 +1,10 @@ +export interface ABTestQueueItem { + variants: string[]; + cellId: string; + testId: string; + testName?: string; + names?: string[]; + abProbability?: number; + /** Model identifiers for server-initiated model comparison tests. */ + models?: string[]; +} diff --git a/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.css b/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.css index 9c68350e0..b53a5bac0 100644 --- a/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.css +++ b/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.css @@ -43,6 +43,17 @@ color: var(--vscode-descriptionForeground); } +.ab-test-queue-badge { + display: inline-block; + font-size: 12px; + font-weight: 600; + color: var(--vscode-button-foreground); + background-color: var(--vscode-button-background); + padding: 2px 10px; + border-radius: 12px; + margin-bottom: 6px; +} + .ab-test-variants { padding: 16px 24px; display: flex; diff --git a/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.tsx b/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.tsx index ecad348b4..359c32e3e 100644 --- a/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.tsx +++ b/webviews/codex-webviews/src/CodexCellEditor/components/ABTestVariantSelector.tsx @@ -6,7 +6,9 @@ interface ABTestVariantSelectorProps { cellId: string; testId: string; headerOverride?: string; // Custom header text (e.g., for recovery after attention check) - onVariantSelected: (index: number, selectionTimeMs: number) => void; + queuePosition?: number; // 1-based index in the A/B test queue (e.g., 1) + queueTotal?: number; // Total tests in queue (e.g., 3) + onVariantSelected: (index: number) => void; onDismiss: () => void; } @@ -15,19 +17,19 @@ export const ABTestVariantSelector: React.FC = ({ cellId, testId, headerOverride, + queuePosition, + queueTotal, onVariantSelected, onDismiss }) => { - const [startTime] = useState(Date.now()); const [selectedIndex, setSelectedIndex] = useState(null); const [order] = useState(() => variants.map((_, i) => i).sort(() => Math.random() - 0.5)); const handleVariantSelect = (index: number) => { if (selectedIndex !== null) return; // Prevent double selection - const selectionTime = Date.now() - startTime; setSelectedIndex(index); - onVariantSelected(index, selectionTime); + onVariantSelected(index); }; const stripHtmlTags = (html: string): string => { @@ -43,6 +45,11 @@ export const ABTestVariantSelector: React.FC = ({
e.stopPropagation()}>

{headerOverride || (selectedIndex === null ? 'Choose Translation' : 'Result')}

+ {queueTotal != null && queueTotal > 1 && ( +
+ {queuePosition} of {queueTotal} +
+ )} {selectedIndex === null && !headerOverride ? (

Pick the translation that reads best for this context.

) : selectedIndex !== null ? (