From 2cadedef6dc86ebf44f9d17088cf2c481e8f2aa5 Mon Sep 17 00:00:00 2001
From: Joshua Horton <joshua_horton@sil.org>
Date: Thu, 2 Oct 2025 16:29:03 -0500
Subject: [PATCH] fix(web): fix base context-state validation

After changes added in #14880, one of our unit tests failed.  After lots of investigation, it turns out that the `isSubstitutionAlignable` does not catch certain cases that `determineContextSlideTransform` does.  The latter does need some improvement to work as a fix, but a small change lets it pick up the slack and replace the former.

As it's also used during context-state transition, re-using the same method in both places gives us far more confidence that we won't have an error the second time around.

Build-bot: skip build:web
Test-bot: skip
---
 .../src/main/correction/context-state.ts      | 10 +++-
 .../worker-thread/src/main/predict-helpers.ts | 18 ++-----
 ...ine-suggestion-context-transition.tests.ts |  4 +-
 .../worker-model-compositor.tests.ts          | 47 +++++++++++++++++--
 4 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts
index a068a45ac5b..60f0103aae6 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-state.ts
@@ -319,7 +319,13 @@ export function determineContextSlideTransform(srcContext: Context, dstContext:
   // Assertion: the current (sliding) context window is alignable.
   // See `matchBaseContextState` in ../predict-helpers.ts.
   if(srcContext.startOfBuffer && dstContext.startOfBuffer) {
-    return { insert: '', deleteLeft: 0, deleteRight: 0 };
+    // Validate that they actually match.
+    // If not, the contexts shouldn't equal.
+    if(srcContext.left == dstContext.left) {
+      return { insert: '', deleteLeft: 0, deleteRight: 0 };
+    } else {
+      return null;
+    }
   }
 
   // Assertion:  the right-hand side of the left-context strings WILL match.
@@ -338,7 +344,7 @@ export function determineContextSlideTransform(srcContext: Context, dstContext:
 
   // Validation:  does the part of both strings that should match actually match?
   if(rawDelta > 0 ? dst.slice(rawDelta) != src : src.slice(-rawDelta) != dst) {
-    throw new Error("Invalid base context");
+    return null;
   }
 
   return {
diff --git a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
index 2252549b164..174d73ad585 100644
--- a/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
+++ b/web/src/engine/predictive-text/worker-thread/src/main/predict-helpers.ts
@@ -5,9 +5,8 @@ import { defaultWordbreaker, WordBreakProperty } from '@keymanapp/models-wordbre
 
 import TransformUtils from './transformUtils.js';
 import { determineModelTokenizer, determineModelWordbreaker, determinePunctuationFromModel } from './model-helpers.js';
-import { isSubstitutionAlignable } from './correction/alignment-helpers.js';
 import { ContextTracker } from './correction/context-tracker.js';
-import { ContextState } from './correction/context-state.js';
+import { ContextState, determineContextSlideTransform } from './correction/context-state.js';
 import { ExecutionTimer } from './correction/execution-timer.js';
 import ModelCompositor from './model-compositor.js';
 
@@ -240,17 +239,6 @@ export function matchBaseContextState(
   context: Context,
   transitionId: number
 ): ContextState {
-  const contextsMatch = (a: Context, b: Context) => {
-    // If either context's window is equal to or greater than the threshold
-    // length, there's a good chance something slid into or out of range.
-    if(!a.startOfBuffer || !b.startOfBuffer) {
-      return isSubstitutionAlignable(a.left, b.left);
-    } else {
-      // If both are smaller than the threshold, the text should match exactly.
-      return a.left == b.left;
-    }
-  };
-
   const lastTransition = contextTracker.latest;
   let contextState: ContextState;
 
@@ -259,9 +247,9 @@ export function matchBaseContextState(
   // delete (if lengthened).  No substitutions possible, as no rules will have
   // occurred - the ONLY change is the amount of known text vs the context
   // window's range.
-  if(contextsMatch(lastTransition.final.context, context)) {
+  if(determineContextSlideTransform(lastTransition.final.context, context)) {
     contextState = lastTransition.final;
-  } else if(contextsMatch(lastTransition.base.context, context)) {
+  } else if(determineContextSlideTransform(lastTransition.base.context, context)) {
     // Multitap case:  if we reverted back to the original underlying context,
     // rather than using the previous output context.
     //
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts
index c2e18858f7f..50958ea756c 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/prediction-helpers/determine-suggestion-context-transition.tests.ts
@@ -96,7 +96,7 @@ describe('determineContextTransition', () => {
       const transition = determineContextTransition(
         tracker,
         tracker.latest.base,
-        targetContext,
+        baseContext,
         inputDistribution
       );
 
@@ -105,6 +105,8 @@ describe('determineContextTransition', () => {
       assert.isFalse(warningEmitterSpy.called);
       assert.sameOrderedMembers(transition.final.tokenization.exampleInput, ['this', ' ', 'is', ' ', 'for', ' ', 'techn']);
       assert.isOk(transition.final.tokenization.alignment);
+      assert.equal(transition.final.context.left, targetContext.left);
+      assert.equal(transition.final.context.right ?? "", targetContext.right ?? "");
       assert.sameDeepOrderedMembers(transition.inputDistribution, inputDistribution);
       assert.isNotOk(transition.preservationTransform);
       assert.equal(transition.transitionId, 1);
diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts
index 7181dbd4bfd..928b6e75c47 100644
--- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts
+++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/worker-model-compositor.tests.ts
@@ -41,14 +41,16 @@ describe('ModelCompositor', function() {
         assert.isEmpty(suggestions1.filter((entry) => entry.tag != 'keep'));
         assert.isOk(suggestions1.find((entry) => entry.tag == 'keep'));
 
+        const context2 = {
+          left: "the apl", startOfBuffer: true, endOfBuffer: true
+        };
+        compositor.resetContext(context2, 2);
+        // Context + insert needs to match a word in the model; there is no matching word here.
+        // Invalid due to the existing context root.
         const suggestions2 = await compositor.predict({
           insert: "l",
           deleteLeft: 0
-        }, {
-          // Context + insert needs to match a word in the model; there is no matching word here.
-          // Invalid due to the existing context root.
-          left: "the apl", startOfBuffer: true, endOfBuffer: true
-        });
+        }, context2);
 
         assert.isEmpty(suggestions2.filter((entry) => entry.tag != 'keep'));
         assert.isOk(suggestions2.find((entry) => entry.tag == 'keep'));
@@ -71,6 +73,41 @@ describe('ModelCompositor', function() {
         assert.isOk(suggestions.find((entry) => entry.displayAs == 'applied'));
       });
 
+      it('generates suggestions from an empty context', async function() {
+        let compositor = new ModelCompositor(plainModel, true);
+        let context = {
+          left: '', startOfBuffer: true, endOfBuffer: true,
+        };
+
+        let inputTransform = {
+          insert: '',
+          deleteLeft: 0
+        };
+
+        let suggestions = await compositor.predict(inputTransform, context);
+        suggestions.forEach(function(suggestion) {
+          // Suggstions are built based on the context state BEFORE the triggering
+          // input, replacing the prediction's root with the complete word.
+          //
+          // This is necessary, in part, for proper display-string construction.
+          assert.equal(suggestion.transform.deleteLeft, 0);
+        });
+
+        let keep = suggestions.find(function(suggestion) {
+          return suggestion.tag == 'keep';
+        });
+
+        assert.isUndefined(keep);
+
+        // Expect an appended space.
+        let expectedEntries = ['the', 'and', 'of', 'it'];
+        expectedEntries.forEach(function(entry) {
+          assert.isDefined(suggestions.find(function(suggestion) {
+            return suggestion.transform.insert == entry && suggestion.appendedTransform?.insert == ' ';
+          }));
+        });
+      });
+
       it('generates suggestions with expected properties', async function() {
         let compositor = new ModelCompositor(plainModel, true);
         let context = {