From af219def62342355e71642547af5e60237d8ea38 Mon Sep 17 00:00:00 2001
From: "Joshua A. Horton" <joshua_horton@sil.org>
Date: Thu, 4 Aug 2022 14:38:48 +0700
Subject: [PATCH 1/2] fix(web): enhanced timer for prediction alg

---
 .../src/correction/distance-modeler.ts        | 78 +++++++++++++++++--
 1 file changed, 73 insertions(+), 5 deletions(-)
diff --git a/common/web/lm-worker/src/correction/distance-modeler.ts b/common/web/lm-worker/src/correction/distance-modeler.ts
index 7b4456e4d8c..d4c13ff78f3 100644
--- a/common/web/lm-worker/src/correction/distance-modeler.ts
+++ b/common/web/lm-worker/src/correction/distance-modeler.ts
@@ -526,8 +526,6 @@ namespace correction {
       let searchSpace = this;
       let currentReturns: {[mapKey: string]: SearchNode} = {};
 
-      // JS measures time by the number of milliseconds since Jan 1, 1970.
-      let timeStart = Date.now();
       let maxTime: number;
       if(waitMillis == 0) {
         maxTime = Infinity;
@@ -537,6 +535,70 @@ namespace correction {
         maxTime = waitMillis;
       }
 
+      class ExecutionTimer {
+        private start: number;
+        private loopStart: number;
+        private maxExecutionTime: number;
+        private maxTrueTime: number;
+
+        private executionTime: number;
+
+        /**
+         * Used to track intervals in which potential context swaps by the OS may
+         * have occurred.
+         */
+        private largestIntervals: number[] = [0];
+        private iterationCount: number = 0;
+
+        constructor(maxExecutionTime: number, maxTrueTime: number) {
+          // JS measures time by the number of milliseconds since Jan 1, 1970.
+          this.loopStart = this.start = Date.now();
+          this.maxExecutionTime = maxExecutionTime;
+          this.maxTrueTime = maxTrueTime;
+        }
+
+        startLoop() {
+          this.loopStart = Date.now();
+        }
+
+        markIteration() {
+          const now = Date.now();
+          const delta = now - this.loopStart;
+          this.executionTime += delta;
+          this.iterationCount++;
+
+          if(delta) {
+            if(this.largestIntervals.length > 2 && delta > this.largestIntervals[0]) {
+              this.largestIntervals[0] = delta;
+            } else {
+              this.largestIntervals.push(delta);
+            }
+            this.largestIntervals.sort();
+          }
+        }
+
+        shouldTimeout(): boolean {
+          const now = Date.now();
+          if(this.start - now > this.maxTrueTime) {
+            return true;
+          }
+
+          // Look at the 'intervals' for a possible over-large outliers.
+          if(this.largestIntervals.length > 2) {
+            if(this.largestIntervals[2] > 2 * (this.largestIntervals[0] + this.largestIntervals[1])) {
+              this.executionTime -= this.largestIntervals[2];
+              this.largestIntervals.pop();
+            }
+          }
+
+          return this.executionTime > this.maxExecutionTime;
+        }
+
+        resetOutlierCheck() {
+          this.largestIntervals = [];
+        }
+      }
+
       class BatchingAssistant {
         currentCost = Number.MIN_SAFE_INTEGER;
         entries: SearchResult[] = [];
@@ -582,17 +644,22 @@ namespace correction {
 
       let batcher = new BatchingAssistant();
 
+      const timer = new ExecutionTimer(maxTime*3, maxTime);
+
       // Stage 1 - if we already have extracted results, build a queue just for them and iterate over it first.
       let returnedValues = Object.values(this.returnedValues);
       if(returnedValues.length > 0) {
         let preprocessedQueue = new models.PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, returnedValues);
 
         // Build batches of same-cost entries.
+        timer.startLoop();
         while(preprocessedQueue.count > 0) {
           let entry = preprocessedQueue.dequeue();
           let batch = batcher.checkAndAdd(entry);
+          timer.markIteration();
 
           if(batch) {
+            // Do not track yielded time.
             yield batch;
           }
         }
@@ -601,11 +668,13 @@ namespace correction {
         // finalize the last preprocessed group without issue.
         let batch = batcher.tryFinalize();
         if(batch) {
+          // Do not track yielded time.
           yield batch;
         }
       }
 
       // Stage 2:  the fun part; actually searching!
+      timer.startLoop();
       let timedOut = false;
       do {
         let newResult: PathResult;
@@ -613,10 +682,9 @@ namespace correction {
         // Search for a 'complete' path, skipping all partial paths as long as time remains.
         do {
           newResult = this.handleNextNode();
+          timer.markIteration();
 
-          // (Naive) timeout check!
-          let now = Date.now();
-          if(now - timeStart > maxTime) {
+          if(timer.shouldTimeout()) {
             timedOut = true;
           }
         } while(!timedOut && newResult.type == 'intermediate')

From 814a4922770802b7e2acb7173fb82391b7786987 Mon Sep 17 00:00:00 2001
From: "Joshua A. Horton" <joshua_horton@sil.org>
Date: Tue, 9 Aug 2022 09:00:29 +0700
Subject: [PATCH 2/2] chore(web): tweaks, docs for execution-timer inner-class

---
 .../src/correction/distance-modeler.ts        | 89 ++++++++++++++++---
 1 file changed, 78 insertions(+), 11 deletions(-)

diff --git a/common/web/lm-worker/src/correction/distance-modeler.ts b/common/web/lm-worker/src/correction/distance-modeler.ts
index d4c13ff78f3..b0cd2179a92 100644
--- a/common/web/lm-worker/src/correction/distance-modeler.ts
+++ b/common/web/lm-worker/src/correction/distance-modeler.ts
@@ -535,9 +535,25 @@ namespace correction {
         maxTime = waitMillis;
       }
 
+      /**
+       * This inner class is designed to help the algorithm detect its active execution time.
+       * While there's no official JS way to do this, we can approximate it by polling the
+       * current system time (in ms) after each iteration of a short-duration loop.  Unusual
+       * spikes in system time for a single iteration is likely to indicate that an OS
+       * context switch occurred at some point during the iteration's execution.
+       */
       class ExecutionTimer {
+        /**
+         * The system time when this instance was created.
+         */
         private start: number;
+
+        /**
+         * Marks the system time at the start of the currently-running loop, as noted
+         * by a call to the `startLoop` function.
+         */
         private loopStart: number;
+
         private maxExecutionTime: number;
         private maxTrueTime: number;
 
@@ -545,10 +561,14 @@ namespace correction {
 
         /**
          * Used to track intervals in which potential context swaps by the OS may
-         * have occurred.
+         * have occurred.  Context switches generally seem to pause threads for
+         * at least 16 ms, while we expect each loop iteration to complete
+         * within just 1 ms.  So, any possible context switch should have the
+         * longest observed change in system time.
+         *
+         * See `updateOutliers` for more details.
          */
         private largestIntervals: number[] = [0];
-        private iterationCount: number = 0;
 
         constructor(maxExecutionTime: number, maxTrueTime: number) {
           // JS measures time by the number of milliseconds since Jan 1, 1970.
@@ -565,31 +585,77 @@ namespace correction {
           const now = Date.now();
           const delta = now - this.loopStart;
           this.executionTime += delta;
-          this.iterationCount++;
 
+          /**
+           * Update the list of the three longest system-time intervals observed
+           * for execution of a single loop iteration.
+           *
+           * Ignore any zero-ms length intervals; they'd make the logic much
+           * messier than necessary otherwise.
+           */
           if(delta) {
+            // If the currently-observed interval is longer than the shortest of the 3
+            // previously-observed longest intervals, replace it.
             if(this.largestIntervals.length > 2 && delta > this.largestIntervals[0]) {
               this.largestIntervals[0] = delta;
             } else {
               this.largestIntervals.push(delta);
             }
+            // Puts the list in ascending order.  Shortest of the list becomes the head,
+            // longest one the tail.
             this.largestIntervals.sort();
-          }
-        }
 
-        shouldTimeout(): boolean {
-          const now = Date.now();
-          if(this.start - now > this.maxTrueTime) {
-            return true;
+            // Then, determine if we need to update our outlier-based tweaks.
+            this.updateOutliers();
           }
+        }
 
-          // Look at the 'intervals' for a possible over-large outliers.
+        updateOutliers() {
+          /* Base assumption:  since each loop of the search should evaluate within ~1ms,
+           *                   notably longer execution times are probably context switches.
+           *
+           * Base assumption:  OS context switches generally last at least 16ms.  (Based on
+           *                   a window.setTimeout() usually not evaluating for at least
+           *                   that long, even if set to 1ms.)
+           *
+           * To mitigate these assumptions:  we'll track the execution time of every loop
+           * iteration.  If the longest observation somehow matches or exceeds the length of
+           * the next two almost-longest observations twice over... we have a very strong
+           * 'context switch' candidate.
+           *
+           * Or, in near-formal math/stats:  we expect a very low variance in execution
+           * time among the iterations of the search's loops.  With a very low variance,
+           * ANY significant proportional spikes in execution time are outliers - outliers
+           * likely caused by an OS context switch.
+           *
+           * Rather than do intensive math, we use a somewhat lazy approach below that
+           * achieves the same net results given our assumptions, even when relaxed somewhat.
+           *
+           * The logic below relaxes the base assumptions a bit to be safe:
+           * - [2ms, 2ms, 8ms]  will cause 8ms to be seen as an outlier.
+           * - [2ms, 3ms, 10ms] will cause 10ms to be seen as an outlier.
+           *
+           * Ideally:
+           * - [1ms, 1ms, 4ms] will view 4ms as an outlier.
+           *
+           * So we can safely handle slightly longer average intervals and slightly shorter
+           * OS context-switch time intervals.
+           */
           if(this.largestIntervals.length > 2) {
-            if(this.largestIntervals[2] > 2 * (this.largestIntervals[0] + this.largestIntervals[1])) {
+            // Precondition:  the `largestIntervals` array is sorted in ascending order.
+            // Shortest entry is at the head, longest at the tail.
+            if(this.largestIntervals[2] >= 2 * (this.largestIntervals[0] + this.largestIntervals[1])) {
               this.executionTime -= this.largestIntervals[2];
               this.largestIntervals.pop();
             }
           }
+        }
+
+        shouldTimeout(): boolean {
+          const now = Date.now();
+          if(this.start - now > this.maxTrueTime) {
+            return true;
+          }
 
           return this.executionTime > this.maxExecutionTime;
         }
@@ -674,6 +740,7 @@ namespace correction {
       }
 
       // Stage 2:  the fun part; actually searching!
+      timer.resetOutlierCheck();
       timer.startLoop();
       let timedOut = false;
       do {