keymanapp · jahorton · Nov 19, 2025 · Nov 20, 2025
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts
@@ -187,14 +187,15 @@ export class ContextState {
    *
    * May also contain a single entry for applying Suggestions or when correction behavior
    * is disabled.
-   * @param isApplyingSuggestion When true, alters behavior to better model application of suggestions.
+   * @param appliedSuggestionId When defined, notes the original transition ID corresponding to
+   * the applied suggestion.
    * @returns
    */
   analyzeTransition(
     context: Context,
     transformDistribution: Distribution<Transform>,
     // overrides checks for token substitution that can fail for large applied suggestions.
-    isApplyingSuggestion?: boolean
+    appliedSuggestionId?: number
   ): ContextTransition {
     const lexicalModel = this.model;
 
@@ -249,7 +250,8 @@ export class ContextState {
     // into subsets.
     const bestProb = transformDistribution.reduce((best, curr) => Math.max(best, curr.p), 0);
     // Should gain one per subsetBuilder.subsets entry.
-    const resultTokenization = baseTokenization.evaluateTransition(tokenizationAnalysis, lexicalModel, trueInput, bestProb);
+    const realignedTokenization = baseTokenization.realign(tokenizationAnalysis.alignment);
+    const resultTokenization = realignedTokenization.evaluateTransition(tokenizationAnalysis, trueInput.id, bestProb, appliedSuggestionId);
 
     // ------------
 

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -105,11 +105,15 @@ export class ContextTokenization {
    * The sequence of tokens in the context represented by this instance.
    */
   readonly tokens: ContextToken[];
+
   /**
-   * The tokenization-transition metadata relating this instance to the most likely
-   * tokenization from a prior state.
+   * Denotes whether or not the transition to this tokenization added or deleted
+   * any tokens.
    */
-  readonly transitionEdits?: TransitionEdge;
+  readonly transitionEdits?: {
+    addedNewTokens: boolean,
+    removedOldTokens: boolean
+  };
 
   /**
    * The portion of edits from the true input keystroke that are not part of the
@@ -129,13 +133,18 @@ export class ContextTokenization {
   constructor(tokens: ContextToken[], alignment: TransitionEdge, taillessTrueKeystroke: Transform);
   constructor(
     param1: ContextToken[] | ContextTokenization,
-    alignment?: TransitionEdge,
+    tokenizationPath?: TransitionEdge,
     taillessTrueKeystroke?: Transform
   ) {
     if(!(param1 instanceof ContextTokenization)) {
       const tokens = param1;
       this.tokens = [].concat(tokens);
-      this.transitionEdits = alignment;
+      if(tokenizationPath) {
+        this.transitionEdits = {
+          addedNewTokens: tokenizationPath?.inputs[0].sample.has(1) ?? false,
+          removedOldTokens: (tokenizationPath?.alignment.removedTokenCount ?? 0) > 0
+        }
+      }
       this.taillessTrueKeystroke = taillessTrueKeystroke;
     } else {
       const priorToClone = param1;
@@ -489,30 +498,16 @@ export class ContextTokenization {
 
   /**
    * Given results from `precomputeTokenizationAfterInput`, this method will
-   * evaluate the pending transition in tokenization for all associated inputs
+   * realign this tokenization's range to match the incoming keystroke's context window
    * while reusing as many correction-search intermediate results as possible.
-   * @param transitionEdge Batched results from one or more
+   * @param alignment Batched results from one or more
    * `precomputeTokenizationAfterInput` calls on this instance, all with the
    * same alignment values.
-   * @param lexicalModel The active lexical model
-   * @param sourceInput The Transform associated with the keystroke triggering
-   * the transition.
-   * @param bestProbFromSet The probability of the single most likely input
-   * transform in the overall transformDistribution associated with the
-   * keystroke triggering the transition.  It need not be represented by the
-   * TransitionEdge to be built.
    * @returns
    */
-  evaluateTransition(
-    transitionEdge: TransitionEdge,
-    lexicalModel: LexicalModel,
-    sourceInput: Transform,
-    bestProbFromSet: number
-  ): ContextTokenization {
-    const { alignment: alignment, inputs } = transitionEdge;
+  realign(alignment: TransitionEdgeAlignment): ContextTokenization {
     const sliceIndex = alignment.edgeWindow.sliceIndex;
     const baseTokenization = this.tokens.slice(sliceIndex);
-    let affectedToken: ContextToken;
 
     const tokenization: ContextToken[] = [];
 
@@ -553,33 +548,71 @@ export class ContextTokenization {
       tokenization.push(token);
     }
 
+    return new ContextTokenization(this.tokens.slice(0, sliceIndex).concat(tokenization), null, this.taillessTrueKeystroke);
+  }
+
+  /**
+   * Given results from `precomputeTokenizationAfterInput`, this method will
+   * evaluate the pending transition in tokenization for all associated inputs
+   * while reusing as many correction-search intermediate results as possible.
+   * @param transitionEdge Batched results from one or more
+   * `precomputeTokenizationAfterInput` calls on this instance, all with the
+   * same alignment values.
+   * @param transitionId The id of the Transform associated with the keystroke
+   * triggering the transition.
+   * @param bestProbFromSet The probability of the single most likely input
+   * transform in the overall transformDistribution associated with the
+   * keystroke triggering theh transition.  It need not be represented by the
+   * tokenizationPath to be built.
+   * @param appliedSuggestionId
+   * @returns
+   */
+  evaluateTransition(
+    transitionEdge: TransitionEdge,
+    transitionId: number,
+    bestProbFromSet: number,
+    appliedSuggestionId?: number
+  ): ContextTokenization {
+    const { alignment, inputs } = transitionEdge;
+    const sliceIndex = alignment.edgeWindow.sliceIndex;
+    const lexicalModel = this.tail.searchModule.model;
+
+    let affectedToken: ContextToken;
+
+    const tailTokenization = this.tokens.slice(sliceIndex);
+
     // Assumption:  inputs.length > 0.  (There is at least one input transform.)
     const inputTransformKeys = [...inputs[0].sample.keys()];
+    const baseTailIndex = (tailTokenization.length - 1);
     let removedTokenCount = alignment.removedTokenCount;
     while(removedTokenCount-- > 0) {
       inputTransformKeys.pop();
-      tokenization.pop();
+      tailTokenization.pop();
     }
 
     let appliedLength = 0;
     for(let i = 0; i < inputTransformKeys.length; i++) {
       const tailRelativeIndex = inputTransformKeys[i];
       let distribution = inputs.map((i) => ({sample: i.sample.get(tailRelativeIndex), p: i.p}));
-      const tokenIndex = (tokenization.length - 1) + tailRelativeIndex;
+      const tokenIndex = baseTailIndex + tailRelativeIndex;
 
-      affectedToken = tokenization[tokenIndex];
+      affectedToken = tailTokenization[tokenIndex];
       if(!affectedToken) {
         affectedToken = new ContextToken(lexicalModel);
-        tokenization.push(affectedToken);
+        tailTokenization.push(affectedToken);
       } else if(KMWString.length(affectedToken.exampleInput) == distribution[0].sample.deleteLeft) {
         // If the entire token will be replaced, throw out the old one and start anew.
         affectedToken = new ContextToken(lexicalModel);
         // Replace the token at the affected index with a brand-new token.
-        tokenization.splice(tokenIndex, 1, affectedToken);
+        tailTokenization.splice(tokenIndex, 1, affectedToken);
       }
 
       affectedToken.isPartial = true;
-      delete affectedToken.appliedTransitionId;
+      if(appliedSuggestionId !== undefined) {
+        affectedToken.appliedTransitionId = appliedSuggestionId;
+      } else {
+        delete affectedToken.appliedTransitionId;
+      }
 
       // If we are completely replacing a token via delete left, erase the deleteLeft;
       // that part applied to a _previous_ token that no longer exists.
@@ -590,7 +623,7 @@ export class ContextTokenization {
 
       const inputSource: PathInputProperties = {
         segment: {
-          transitionId: sourceInput.id,
+          transitionId,
           start: appliedLength
         },
         bestProbFromSet: bestProbFromSet,
@@ -601,17 +634,21 @@ export class ContextTokenization {
         inputSource.segment.end = appliedLength;
       }
 
+      affectedToken = new ContextToken(affectedToken);
       affectedToken.addInput(inputSource, distribution);
 
       const tokenize = determineModelTokenizer(lexicalModel);
       affectedToken.isWhitespace = tokenize({left: affectedToken.exampleInput, startOfBuffer: false, endOfBuffer: false}).left[0]?.isWhitespace ?? false;
+      // Do not re-use the previous token; the mutation may have unexpected
+      // results (say, in unit-testing)
+      tailTokenization[tokenIndex] = affectedToken;
 
       affectedToken = null;
     }
 
     return new ContextTokenization(
-      this.tokens.slice(0, sliceIndex).concat(tokenization),
-      null /* tokenMapping */,
+      this.tokens.slice(0, sliceIndex).concat(tailTokenization),
+      transitionEdge,
       determineTaillessTrueKeystroke(transitionEdge)
     );
   }

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts
@@ -16,17 +16,6 @@ import Reversion = LexicalModelTypes.Reversion;
 import Suggestion = LexicalModelTypes.Suggestion;
 import Transform = LexicalModelTypes.Transform;
 
-// Mark affected tokens with the applied-suggestion transition ID
-// for easy future reference.
-const tagTokens = (state: ContextState, suggestion: Suggestion) => {
-  const inputs = state.tokenization.transitionEdits.inputs;
-  const appliedTokenCount = inputs[0].sample.size;
-  const tokens = state.tokenization.tokens;
-  for(let i = tokens.length - appliedTokenCount; i < tokens.length; i++) {
-    tokens[i].appliedTransitionId = suggestion.transformId;
-  }
-}
-
 /**
  * Represents the transition between two context states as triggered
  * by input keystrokes or applied suggestions.
@@ -145,15 +134,15 @@ export class ContextTransition {
     const buildAppliedTransition = (
       transition: ContextTransition,
       baseState: ContextState,
-      transform: Transform
+      transform: Transform,
+      appliedTransitionId: number
     ) => {
       const state = baseState.analyzeTransition(
         baseState.context,
         [{sample: transform, p: 1}],
-        true
+        appliedTransitionId
       ).final;
 
-      tagTokens(state, suggestion);
       transition._final = state;
 
       // Applying a suggestion should not forget the original suggestion set.
@@ -166,7 +155,7 @@ export class ContextTransition {
     // keystroke data.
 
     const resultTransition = new ContextTransition(this);
-    buildAppliedTransition(resultTransition, this.base, suggestion.transform);
+    buildAppliedTransition(resultTransition, this.base, suggestion.transform, suggestion.transformId);
 
     // An applied suggestion should replace the original Transition's effects, though keeping
     // the original input around.
@@ -178,7 +167,7 @@ export class ContextTransition {
     }
 
     const finalTransition = new ContextTransition(resultTransition.final, suggestion.appendedTransform.id);
-    buildAppliedTransition(finalTransition, resultTransition.final, suggestion.appendedTransform);
+    buildAppliedTransition(finalTransition, resultTransition.final, suggestion.appendedTransform, suggestion.transformId);
 
     // The appended transform is applied with no intermediate input.
     finalTransition.final.appliedInput = { insert: '', deleteLeft: 0 };

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -346,14 +346,13 @@ export function determineSuggestionAlignment(
   const context = transition.base.context;
   const postContext = transition.final.context;
   const inputTransform = transition.inputDistribution[0].sample;
-  const inputTransformMap = transitionEdits?.inputs[0].sample;
   let deleteLeft: number;
 
   // If the context now has more tokens, the token we'll be 'predicting' didn't originally exist.
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
   // Is the token under construction newly-constructed / is there no pre-existing root?
-  if(tokenization.taillessTrueKeystroke && inputTransformMap?.has(1)) {
+  if(tokenization.taillessTrueKeystroke && transitionEdits?.addedNewTokens) {
     return {
       // If the new token is due to whitespace or due to a different input type
       // that would likely imply a tokenization boundary, infer 'new word' mode.
@@ -366,7 +365,7 @@ export function determineSuggestionAlignment(
       deleteLeft: 0
     };
     // If the tokenized context length is shorter... sounds like a backspace (or similar).
-  } else if (transitionEdits?.alignment.removedTokenCount > 0) {
+  } else if (transitionEdits?.removedOldTokens) {
     /* Ooh, we've dropped context here.  Almost certainly from a backspace or
      * similar effect.  Even if we drop multiple tokens... well, we know exactly
      * how many chars were actually deleted - `inputTransform.deleteLeft`. Since