keymanapp · jahorton · Oct 20, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 16, 2025
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts
@@ -262,30 +262,8 @@ export class ContextState {
     const nonEmptyTail = !tokens[lastIndex].isEmptyToken ? tokens[lastIndex] : tokens[lastIndex - 1];
     const appliedSuggestionTransitionId = nonEmptyTail?.appliedTransitionId;
 
-    const postContext = transformDistribution?.[0] ? applyTransform(transformDistribution[0].sample, context) : context;
-
-    // Note for future:  the next line's pattern asserts that there is only one true tokenization.
-    // We may eventually allow for multiple potential tokenizations (per epic-dict-breaker)
-    const tokenizedContext = determineModelTokenizer(lexicalModel)(postContext).left;
-    if(tokenizedContext.length == 0) {
-      tokenizedContext.push({text: ''});
-    }
-    // In which case we could try need to align for each of them, starting from the most likely.
-
-    // If we're not at the start of the buffer, we're probably a sliding context.
-    const isSliding = !this.context.startOfBuffer;
-
-    // It's possible the tokenization will remember more of the initial token than is
-    // actually present in the sliding context window, which imposes a need for a wide-band
-    // computeDistance 'radius' in the called function.
-    const alignmentResults = this.tokenization.computeAlignment(tokenizedContext.map((token) => token.text), isSliding, isApplyingSuggestion);
-
-    // Stopgap:  add tokenized transformSequenceDistribution to the alignment data & use that
-    // where noted:  tagTokens() in context-transition.ts, `determineSuggestionAlignment()`.
-
-
     const state = new ContextState(applyTransform(trueInput, context), lexicalModel);
-    state.tokenization =  new ContextTokenization(resultTokenization.tokens, alignmentResults, resultTokenization.taillessTrueKeystroke);
+    state.tokenization =  new ContextTokenization(resultTokenization.tokens, tokenizationAnalysis, resultTokenization.taillessTrueKeystroke);
     state.appliedInput = transformDistribution?.[0].sample;
     transition.finalize(state, transformDistribution, resultTokenization.taillessTrueKeystroke);
     transition.revertableTransitionId = appliedSuggestionTransitionId;

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-tokenization.ts
@@ -12,7 +12,6 @@ import { KMWString } from '@keymanapp/web-utils';
 
 import { ContextToken } from './context-token.js';
 import TransformUtils from '../transformUtils.js';
-import { computeAlignment, ContextStateAlignment } from './alignment-helpers.js';
 import { computeDistance, EditOperation, EditTuple } from './classical-calculation.js';
 import { determineModelTokenizer } from '../model-helpers.js';
 import { ExtendedEditOperation, SegmentableDistanceCalculation } from './segmentable-calculation.js';
@@ -105,12 +104,11 @@ export class ContextTokenization {
    * The sequence of tokens in the context represented by this instance.
    */
   readonly tokens: ContextToken[];
-
   /**
    * The tokenization-transition metadata relating this instance to the most likely
    * tokenization from a prior state.
    */
-  readonly alignment?: ContextStateAlignment;
+  readonly transitionEdits?: PendingTokenization;
 
   /**
    * The portion of edits from the true input keystroke that are not part of the
@@ -125,21 +123,21 @@ export class ContextTokenization {
 
   constructor(priorToClone: ContextTokenization);
   constructor(tokens: ContextToken[]);
-  constructor(tokens: ContextToken[], alignment: ContextStateAlignment, taillessTrueKeystroke: Transform);
+  constructor(tokens: ContextToken[], alignment: PendingTokenization, taillessTrueKeystroke: Transform);
   constructor(
     param1: ContextToken[] | ContextTokenization,
-    alignment?: ContextStateAlignment,
+    alignment?: PendingTokenization,
     taillessTrueKeystroke?: Transform
   ) {
     if(!(param1 instanceof ContextTokenization)) {
       const tokens = param1;
       this.tokens = [].concat(tokens);
-      this.alignment = alignment;
+      this.transitionEdits = alignment;
       this.taillessTrueKeystroke = taillessTrueKeystroke;
     } else {
       const priorToClone = param1;
       this.tokens = priorToClone.tokens.map((entry) => new ContextToken(entry));
-      this.alignment = {...priorToClone.alignment};
+      this.transitionEdits = {...priorToClone.transitionEdits};
       this.taillessTrueKeystroke = priorToClone.taillessTrueKeystroke;
     }
   }
@@ -169,20 +167,6 @@ export class ContextTokenization {
     return this.tokens.map(token => token.exampleInput);
   }
 
-  /**
-   * Determines the alignment between a new, incoming tokenization source and the
-   * tokenization modeled by the current instance.
-   * @param incomingTokenization Raw strings corresponding to the tokenization of the incoming context
-   * @param isSliding Notes if the context window is full (and sliding-alignment is particularly needed)
-   * @param noSubVerify When true, this disables inspection of 'substitute' transitions that avoids
-   * wholesale replacement of the original token.
-   * @returns Alignment data that details if and how the incoming tokenization aligns with
-   * the tokenization modeled by this instance.
-   */
-  computeAlignment(incomingTokenization: string[], isSliding: boolean, noSubVerify?: boolean): ContextStateAlignment {
-    return computeAlignment(this.exampleInput, incomingTokenization, isSliding, noSubVerify);
-  }
-
   /**
    * Applies the specified Transform to the _left-hand_ side of the context in
    * order to update and match the current contents of the sliding context

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-transition.ts
@@ -16,12 +16,11 @@ import Reversion = LexicalModelTypes.Reversion;
 import Suggestion = LexicalModelTypes.Suggestion;
 import Transform = LexicalModelTypes.Transform;
 
-
 // Mark affected tokens with the applied-suggestion transition ID
 // for easy future reference.
 const tagTokens = (state: ContextState, suggestion: Suggestion) => {
-  const alignment = state.tokenization.alignment
-  const appliedTokenCount = (alignment.canAlign && true) && (alignment.tailEditLength + Math.max(alignment.tailTokenShift, 0));
+  const inputs = state.tokenization.transitionEdits.inputs;
+  const appliedTokenCount = inputs[0].sample.size;
   const tokens = state.tokenization.tokens;
   for(let i = tokens.length - appliedTokenCount; i < tokens.length; i++) {
     tokens[i].appliedTransitionId = suggestion.transformId;

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -342,17 +342,18 @@ export function determineSuggestionAlignment(
    */
   deleteLeft: number
 } {
-  const alignment = transition.final.tokenization.alignment;
+  const transitionEdits = transition.final.tokenization.transitionEdits;
   const context = transition.base.context;
   const postContext = transition.final.context;
   const inputTransform = transition.inputDistribution[0].sample;
+  const inputTransformMap = transitionEdits?.inputs[0].sample;
   let deleteLeft: number;
 
   // If the context now has more tokens, the token we'll be 'predicting' didn't originally exist.
   const wordbreak = determineModelWordbreaker(lexicalModel);
 
   // Is the token under construction newly-constructed / is there no pre-existing root?
-  if(transition.preservationTransform && alignment?.canAlign && alignment.tailTokenShift > 0) {
+  if(transition.preservationTransform && inputTransformMap?.has(1)) {
     return {
       // If the new token is due to whitespace or due to a different input type
       // that would likely imply a tokenization boundary, infer 'new word' mode.
@@ -365,7 +366,7 @@ export function determineSuggestionAlignment(
       deleteLeft: 0
     };
     // If the tokenized context length is shorter... sounds like a backspace (or similar).
-  } else if (alignment?.canAlign && alignment.tailTokenShift < 0) {
+  } else if (transitionEdits?.alignment.removedTokenCount > 0) {
     /* Ooh, we've dropped context here.  Almost certainly from a backspace or
      * similar effect.  Even if we drop multiple tokens... well, we know exactly
      * how many chars were actually deleted - `inputTransform.deleteLeft`. Since