Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/copilotSettings/copilotSettings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ export async function generateChatSystemMessage(

const prompt = `Generate a concise, one-paragraph set of linguistic instructions critical for a linguistically informed translator to keep in mind at all times when translating from ${sourceLanguage.refName} to ${targetLanguage.refName}. Keep it to a single plaintext paragraph. Note key lexicosemantic, information structuring, register-relevant and other key distinctions necessary for grammatical, natural text in ${targetLanguage.refName} if the starting place is ${sourceLanguage.refName}. ${htmlInstruction} Preserve original line breaks from <currentTask><source> by returning text with the same number of lines separated by newline characters. Do not include XML in your answer.`;

const response = await callLLM(
const result = await callLLM(
[
{
role: "user",
Expand All @@ -361,7 +361,7 @@ export async function generateChatSystemMessage(
llmConfig
);

return response;
return result.text;
} catch (error) {
debug("[generateChatSystemMessage] Error generating message:", error);
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,15 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo
const typedEvent = event as Extract<EditorPostMessages, { command: "llmCompletion"; }>;
debug("llmCompletion message received", { event, document, provider, webviewPanel });

// Fire-and-forget: record single-cell translation telemetry
import("../../utils/abTestingAnalytics").then(({ recordAbResult }) =>
recordAbResult({
category: "batch_vs_single",
options: ["single", "batch"],
winner: 0,
})
).catch(() => { /* analytics must never block translation */ });

const cellId = typedEvent.content.currentLineId;
const addContentToValue = typedEvent.content.addContentToValue;

Expand Down Expand Up @@ -1442,10 +1451,19 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo

selectABTestVariant: async ({ event, document, webviewPanel, provider }) => {
const typedEvent = event as Extract<EditorPostMessages, { command: "selectABTestVariant"; }>;
const { cellId, selectedIndex, selectedContent, testId, testName, selectionTimeMs, variants } = typedEvent.content || {};
const variantNames: string[] | undefined = variants;
const { cellId, selectedIndex, selectedContent, testId, testName, variants, models } = typedEvent.content || {};
const isRecovery = testName === "Recovery" || (typeof testId === "string" && testId.includes("-recovery-"));

// Decrement pending A/B test count so normal source highlighting can resume
if (provider.pendingABTestCount > 0) {
provider.pendingABTestCount--;
}

// For model comparison tests, use model names as the analytics options;
// otherwise fall back to the variant text (existing behavior).
const isModelComparison = testName === "model_comparison" && Array.isArray(models) && models.length > 0;
const variantNames: string[] | undefined = isModelComparison ? models : variants;

// Check if this was a pending attention check
const attentionCheck = getAttentionCheck(testId);

Expand All @@ -1459,7 +1477,6 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo
testId,
cellId,
passed: !pickedWrong,
selectionTimeMs,
correctIndex: attentionCheck.correctIndex,
decoyCellId: attentionCheck.decoyCellId
});
Expand All @@ -1481,17 +1498,18 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo
testName: "Recovery",
},
});
provider.pendingABTestCount++;
}
return;
}

// User picked correctly - apply and clear
clearAttentionCheck(testId);
} else {
// Regular A/B test
// Regular A/B test (including model comparison)
if (!isRecovery) {
const { recordVariantSelection } = await import("../../utils/abTestingUtils");
await recordVariantSelection(testId, cellId, selectedIndex, selectionTimeMs, variantNames, testName);
await recordVariantSelection(testId, cellId, selectedIndex, variantNames, testName);
}
}

Expand All @@ -1507,7 +1525,7 @@ const messageHandlers: Record<string, (ctx: MessageHandlerContext) => Promise<vo
}
}

debug(`A/B test feedback recorded: Cell ${cellId}, variant ${selectedIndex}, test ${testId}, took ${selectionTimeMs}ms`);
debug(`A/B test feedback recorded: Cell ${cellId}, variant ${selectedIndex}, test ${testId}`);
},

updateCellDisplayMode: async ({ event, document, webviewPanel, provider }) => {
Expand Down
92 changes: 70 additions & 22 deletions src/providers/codexCellEditorProvider/codexCellEditorProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as vscode from "vscode";
import { fetchCompletionConfig } from "@/utils/llmUtils";
import { CodexNotebookReader } from "../../serializer";
import { workspaceStoreListener } from "../../utils/workspaceEventListener";
import { llmCompletion } from "../translationSuggestions/llmCompletion";
import { llmCompletion, LLMCompletionResult } from "../translationSuggestions/llmCompletion";
import { CodexCellTypes, EditType } from "../../../types/enums";
import {
QuillCellContent,
Expand Down Expand Up @@ -143,6 +143,10 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
}[] = [];
private isProcessingQueue: boolean = false;

// When > 0, A/B tests are awaiting user selection — source highlighting
// is driven by the webview's A/B queue instead of normal cell navigation.
public pendingABTestCount: number = 0;

// New state for autocompletion process
public autocompletionState: {
isProcessing: boolean;
Expand Down Expand Up @@ -335,6 +339,15 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
// Only send highlight messages to source files when a codex file is active
const valueIsCodexFile = this.isCodexFile(value.uri);
if (valueIsCodexFile) {
// When A/B tests are queued during batch, let the webview's
// A/B queue drive source highlighting instead of batch navigation.
const suppressSourceHighlight =
this.pendingABTestCount > 0 &&
this.autocompletionState.isProcessing;
if (suppressSourceHighlight) {
debug("Suppressing source highlight during A/B test queue");
return;
}
debug("Processing codex file highlight");
// Send highlight using cellId (primary) or globalReferences (if available)
for (const [panelUri, panel] of this.webviewPanels.entries()) {
Expand Down Expand Up @@ -1641,6 +1654,15 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
// Send state to webview
this.broadcastAutocompletionState();

// Fire-and-forget: record batch translation telemetry (once per batch initiation)
import("../../utils/abTestingAnalytics").then(({ recordAbResult }) =>
recordAbResult({
category: "batch_vs_single",
options: ["single", "batch"],
winner: 1,
})
).catch(() => { /* analytics must never block translation */ });

// Determine if LLM is ready (API key or auth token). We still run transcriptions even if not ready.
let llmReady = true;
try {
Expand Down Expand Up @@ -2541,10 +2563,19 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
const sourceUri = getCorrespondingSourceUri(codexUri);

// Send highlight/clear messages and milestone jump to source files when a codex file is active
// When A/B tests are pending, source highlighting is driven by the
// webview's A/B queue (via setCurrentIdToGlobalState for the active test).
// Skip source-panel updates here so batch processing doesn't override it.
const suppressSourceHighlight = this.pendingABTestCount > 0 && this.autocompletionState.isProcessing;

for (const [panelUri, panel] of this.webviewPanels.entries()) {
const isSourceFile = this.isSourceText(panelUri);
// copy this to update target with merged cells
if (isSourceFile) {
if (suppressSourceHighlight) {
continue;
}

// Check if this is the matching source file
const isMatchingSource = sourceUri && panelUri === sourceUri.toString();

Expand Down Expand Up @@ -3386,7 +3417,7 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
new vscode.CancellationTokenSource().token;

// Determine if this is a batch operation (chapter autocomplete or multiple cells queued)
// A/B testing is disabled during batch operations to avoid interrupting the workflow
// During batch, A/B tests are non-blocking: variant[0] is auto-applied and the queue continues
const isBatchOperation = this.autocompletionState.isProcessing ||
(this.singleCellQueueState.isProcessing && this.singleCellQueueState.totalCells > 1);

Expand All @@ -3407,8 +3438,8 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
}

// If multiple variants are present, send to the webview for selection
if (completionResult && Array.isArray((completionResult as any).variants) && (completionResult as any).variants.length > 1) {
const { variants, testId, testName, isAttentionCheck, correctIndex, decoyCellId } = completionResult as any;
if (completionResult && Array.isArray(completionResult.variants) && completionResult.variants.length > 1) {
const { variants, testId, testName, isAttentionCheck, correctIndex, decoyCellId, models } = completionResult;

// If variants are identical (ignoring whitespace), treat as single completion
try {
Expand All @@ -3432,21 +3463,21 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
debug("Error comparing variants for identity; proceeding with A/B UI", { error: e });
}

if (webviewPanel) {
const actualTestId = testId || `${currentCellId}-${Date.now()}`;
const actualTestId = testId || `${currentCellId}-${Date.now()}`;

// If this is an attention check, register it so we can handle the response
if (isAttentionCheck && typeof correctIndex === 'number') {
const { registerAttentionCheck } = await import("./codexCellEditorMessagehandling");
registerAttentionCheck(actualTestId, {
cellId: currentCellId,
correctIndex,
correctVariant: variants[correctIndex],
decoyCellId,
});
console.log(`[Attention Check] Registered for testId ${actualTestId}, correctIndex ${correctIndex}`);
}
// If this is an attention check, register it so we can handle the response
if (isAttentionCheck && typeof correctIndex === 'number') {
const { registerAttentionCheck } = await import("./codexCellEditorMessagehandling");
registerAttentionCheck(actualTestId, {
cellId: currentCellId,
correctIndex,
correctVariant: variants[correctIndex],
decoyCellId,
});
console.log(`[Attention Check] Registered for testId ${actualTestId}, correctIndex ${correctIndex}`);
}

if (webviewPanel) {
// Send variants to webview - frontend doesn't need attention check details
this.postMessageToWebview(webviewPanel, {
type: "providerSendsABTestVariants",
Expand All @@ -3455,21 +3486,38 @@ export class CodexCellEditorProvider implements vscode.CustomEditorProvider<Code
cellId: currentCellId,
testId: actualTestId,
testName,
// Include model identifiers for server-initiated model comparison tests
...(Array.isArray(models) && models.length > 0 ? { models } : {}),
},
});
this.pendingABTestCount++;
}

// Mark single cell translation as complete so UI progress/spinners stop
this.updateSingleCellTranslation(1.0);
if (isBatchOperation) {
// NON-BLOCKING path: don't write to the cell yet — the user
// needs to pick a variant first via the A/B selector. The
// selectABTestVariant handler will persist their choice.
// The queue continues immediately without waiting.
// Source panel scrolling is driven by the webview based on
// which A/B test is currently displayed (not queued).
this.updateSingleCellTranslation(1.0);

debug("LLM completion A/B variants sent (batch non-blocking, awaiting user selection)", {
cellId: currentCellId,
variantsCount: variants?.length,
});
return "";
}

// Do not update the cell value now; the frontend will apply the chosen variant
// Return an empty string for consistency with callers expecting a string
// BLOCKING path (single-cell): do not update the cell value now;
// the frontend will apply the chosen variant when the user selects one.
this.updateSingleCellTranslation(1.0);
debug("LLM completion A/B variants sent", { cellId: currentCellId, variantsCount: variants?.length });
return "";
}

// Otherwise, handle as a single completion using the first variant
const singleCompletion = (completionResult as any)?.variants?.[0] ?? "";
const singleCompletion = completionResult?.variants?.[0] ?? "";

progress.report({ message: "Updating document...", increment: 40 });

Expand Down
39 changes: 31 additions & 8 deletions src/providers/translationSuggestions/llmCompletion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ export interface LLMCompletionResult {
isAttentionCheck?: boolean;
correctIndex?: number;
decoyCellId?: string;
/** Model identifiers for server-initiated model comparison A/B tests. */
models?: string[];
}

export async function llmCompletion(
Expand Down Expand Up @@ -246,10 +248,10 @@ export async function llmCompletion(
);

// Unified AB testing via registry with random test selection (global gating)
// A/B testing is disabled during batch operations (chapter autocomplete, batch transcription)
// to avoid interrupting the user with variant selection UI
// A/B tests can fire during batch operations — the caller queues them
// non-blockingly for user selection
const extConfig = vscode.workspace.getConfiguration("codex-editor-extension");
const abEnabled = Boolean(extConfig.get("abTestingEnabled") ?? true) && !isBatchOperation;
const abEnabled = Boolean(extConfig.get("abTestingEnabled") ?? true);
const abProbabilityRaw = extConfig.get<number>("abTestingProbability");
const abProbability = Math.max(0, Math.min(1, typeof abProbabilityRaw === "number" ? abProbabilityRaw : 0.15));
const randomValue = Math.random();
Expand All @@ -260,9 +262,7 @@ export async function llmCompletion(
}

if (!triggerAB && completionConfig.debugMode) {
if (isBatchOperation) {
console.debug(`[llmCompletion] A/B testing disabled during batch operation`);
} else if (!abEnabled) {
if (!abEnabled) {
console.debug(`[llmCompletion] A/B testing disabled in settings`);
} else {
console.debug(`[llmCompletion] A/B test not triggered (random ${randomValue.toFixed(3)} >= probability ${abProbability})`);
Expand Down Expand Up @@ -313,10 +313,33 @@ export async function llmCompletion(
}
}

// A/B testing not triggered (or failed): call LLM once, return two identical variants
const completion = await callLLM(messages, completionConfig, token);
// A/B testing not triggered (or failed): call LLM with ab_eligible flag
// so the server can optionally return a multi-model A/B test response.
const llmResult = await callLLM(messages, completionConfig, token, /* abEligible */ true);
const allowHtml = Boolean(completionConfig.allowHtmlPredictions);

// If the server returned a multi-model A/B test, build the result from it
if (llmResult.abTest) {
const serverVariants = llmResult.abTest.variants.map((txt) =>
postProcessABTestResult(txt, allowHtml, returnHTML)
);
if (completionConfig.debugMode) {
console.debug(
`[llmCompletion] Server returned model A/B test: models=${llmResult.abTest.models.join(", ")}, variants=${serverVariants.length}`
);
}
return {
variants: serverVariants,
isABTest: true,
testId: `${currentCellId}-model-${Date.now()}`,
testName: "model_comparison",
models: llmResult.abTest.models,
};
}

// Standard single-completion path
const completion = llmResult.text;

// Preserve multi-line completions: strip any leading "->" markers per line, then join with <br/>
const lines = (completion || "").split(/\r?\n/);
const processed = lines
Expand Down
Loading