From af219def62342355e71642547af5e60237d8ea38 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Thu, 4 Aug 2022 14:38:48 +0700 Subject: [PATCH 1/2] fix(web): enhanced timer for prediction alg --- .../src/correction/distance-modeler.ts | 78 +++++++++++++++++-- 1 file changed, 73 insertions(+), 5 deletions(-) diff --git a/common/web/lm-worker/src/correction/distance-modeler.ts b/common/web/lm-worker/src/correction/distance-modeler.ts index 7b4456e4d8c..d4c13ff78f3 100644 --- a/common/web/lm-worker/src/correction/distance-modeler.ts +++ b/common/web/lm-worker/src/correction/distance-modeler.ts @@ -526,8 +526,6 @@ namespace correction { let searchSpace = this; let currentReturns: {[mapKey: string]: SearchNode} = {}; - // JS measures time by the number of milliseconds since Jan 1, 1970. - let timeStart = Date.now(); let maxTime: number; if(waitMillis == 0) { maxTime = Infinity; @@ -537,6 +535,70 @@ namespace correction { maxTime = waitMillis; } + class ExecutionTimer { + private start: number; + private loopStart: number; + private maxExecutionTime: number; + private maxTrueTime: number; + + private executionTime: number; + + /** + * Used to track intervals in which potential context swaps by the OS may + * have occurred. + */ + private largestIntervals: number[] = [0]; + private iterationCount: number = 0; + + constructor(maxExecutionTime: number, maxTrueTime: number) { + // JS measures time by the number of milliseconds since Jan 1, 1970. + this.loopStart = this.start = Date.now(); + this.maxExecutionTime = maxExecutionTime; + this.maxTrueTime = maxTrueTime; + } + + startLoop() { + this.loopStart = Date.now(); + } + + markIteration() { + const now = Date.now(); + const delta = now - this.loopStart; + this.executionTime += delta; + this.iterationCount++; + + if(delta) { + if(this.largestIntervals.length > 2 && delta > this.largestIntervals[0]) { + this.largestIntervals[0] = delta; + } else { + this.largestIntervals.push(delta); + } + this.largestIntervals.sort(); + } + } + + shouldTimeout(): boolean { + const now = Date.now(); + if(this.start - now > this.maxTrueTime) { + return true; + } + + // Look at the 'intervals' for a possible over-large outliers. + if(this.largestIntervals.length > 2) { + if(this.largestIntervals[2] > 2 * (this.largestIntervals[0] + this.largestIntervals[1])) { + this.executionTime -= this.largestIntervals[2]; + this.largestIntervals.pop(); + } + } + + return this.executionTime > this.maxExecutionTime; + } + + resetOutlierCheck() { + this.largestIntervals = []; + } + } + class BatchingAssistant { currentCost = Number.MIN_SAFE_INTEGER; entries: SearchResult[] = []; @@ -582,17 +644,22 @@ namespace correction { let batcher = new BatchingAssistant(); + const timer = new ExecutionTimer(maxTime*3, maxTime); + // Stage 1 - if we already have extracted results, build a queue just for them and iterate over it first. let returnedValues = Object.values(this.returnedValues); if(returnedValues.length > 0) { let preprocessedQueue = new models.PriorityQueue(QUEUE_NODE_COMPARATOR, returnedValues); // Build batches of same-cost entries. + timer.startLoop(); while(preprocessedQueue.count > 0) { let entry = preprocessedQueue.dequeue(); let batch = batcher.checkAndAdd(entry); + timer.markIteration(); if(batch) { + // Do not track yielded time. yield batch; } } @@ -601,11 +668,13 @@ namespace correction { // finalize the last preprocessed group without issue. let batch = batcher.tryFinalize(); if(batch) { + // Do not track yielded time. yield batch; } } // Stage 2: the fun part; actually searching! + timer.startLoop(); let timedOut = false; do { let newResult: PathResult; @@ -613,10 +682,9 @@ namespace correction { // Search for a 'complete' path, skipping all partial paths as long as time remains. do { newResult = this.handleNextNode(); + timer.markIteration(); - // (Naive) timeout check! - let now = Date.now(); - if(now - timeStart > maxTime) { + if(timer.shouldTimeout()) { timedOut = true; } } while(!timedOut && newResult.type == 'intermediate') From 814a4922770802b7e2acb7173fb82391b7786987 Mon Sep 17 00:00:00 2001 From: "Joshua A. Horton" Date: Tue, 9 Aug 2022 09:00:29 +0700 Subject: [PATCH 2/2] chore(web): tweaks, docs for execution-timer inner-class --- .../src/correction/distance-modeler.ts | 89 ++++++++++++++++--- 1 file changed, 78 insertions(+), 11 deletions(-) diff --git a/common/web/lm-worker/src/correction/distance-modeler.ts b/common/web/lm-worker/src/correction/distance-modeler.ts index d4c13ff78f3..b0cd2179a92 100644 --- a/common/web/lm-worker/src/correction/distance-modeler.ts +++ b/common/web/lm-worker/src/correction/distance-modeler.ts @@ -535,9 +535,25 @@ namespace correction { maxTime = waitMillis; } + /** + * This inner class is designed to help the algorithm detect its active execution time. + * While there's no official JS way to do this, we can approximate it by polling the + * current system time (in ms) after each iteration of a short-duration loop. Unusual + * spikes in system time for a single iteration is likely to indicate that an OS + * context switch occurred at some point during the iteration's execution. + */ class ExecutionTimer { + /** + * The system time when this instance was created. + */ private start: number; + + /** + * Marks the system time at the start of the currently-running loop, as noted + * by a call to the `startLoop` function. + */ private loopStart: number; + private maxExecutionTime: number; private maxTrueTime: number; @@ -545,10 +561,14 @@ namespace correction { /** * Used to track intervals in which potential context swaps by the OS may - * have occurred. + * have occurred. Context switches generally seem to pause threads for + * at least 16 ms, while we expect each loop iteration to complete + * within just 1 ms. So, any possible context switch should have the + * longest observed change in system time. + * + * See `updateOutliers` for more details. */ private largestIntervals: number[] = [0]; - private iterationCount: number = 0; constructor(maxExecutionTime: number, maxTrueTime: number) { // JS measures time by the number of milliseconds since Jan 1, 1970. @@ -565,31 +585,77 @@ namespace correction { const now = Date.now(); const delta = now - this.loopStart; this.executionTime += delta; - this.iterationCount++; + /** + * Update the list of the three longest system-time intervals observed + * for execution of a single loop iteration. + * + * Ignore any zero-ms length intervals; they'd make the logic much + * messier than necessary otherwise. + */ if(delta) { + // If the currently-observed interval is longer than the shortest of the 3 + // previously-observed longest intervals, replace it. if(this.largestIntervals.length > 2 && delta > this.largestIntervals[0]) { this.largestIntervals[0] = delta; } else { this.largestIntervals.push(delta); } + // Puts the list in ascending order. Shortest of the list becomes the head, + // longest one the tail. this.largestIntervals.sort(); - } - } - shouldTimeout(): boolean { - const now = Date.now(); - if(this.start - now > this.maxTrueTime) { - return true; + // Then, determine if we need to update our outlier-based tweaks. + this.updateOutliers(); } + } - // Look at the 'intervals' for a possible over-large outliers. + updateOutliers() { + /* Base assumption: since each loop of the search should evaluate within ~1ms, + * notably longer execution times are probably context switches. + * + * Base assumption: OS context switches generally last at least 16ms. (Based on + * a window.setTimeout() usually not evaluating for at least + * that long, even if set to 1ms.) + * + * To mitigate these assumptions: we'll track the execution time of every loop + * iteration. If the longest observation somehow matches or exceeds the length of + * the next two almost-longest observations twice over... we have a very strong + * 'context switch' candidate. + * + * Or, in near-formal math/stats: we expect a very low variance in execution + * time among the iterations of the search's loops. With a very low variance, + * ANY significant proportional spikes in execution time are outliers - outliers + * likely caused by an OS context switch. + * + * Rather than do intensive math, we use a somewhat lazy approach below that + * achieves the same net results given our assumptions, even when relaxed somewhat. + * + * The logic below relaxes the base assumptions a bit to be safe: + * - [2ms, 2ms, 8ms] will cause 8ms to be seen as an outlier. + * - [2ms, 3ms, 10ms] will cause 10ms to be seen as an outlier. + * + * Ideally: + * - [1ms, 1ms, 4ms] will view 4ms as an outlier. + * + * So we can safely handle slightly longer average intervals and slightly shorter + * OS context-switch time intervals. + */ if(this.largestIntervals.length > 2) { - if(this.largestIntervals[2] > 2 * (this.largestIntervals[0] + this.largestIntervals[1])) { + // Precondition: the `largestIntervals` array is sorted in ascending order. + // Shortest entry is at the head, longest at the tail. + if(this.largestIntervals[2] >= 2 * (this.largestIntervals[0] + this.largestIntervals[1])) { this.executionTime -= this.largestIntervals[2]; this.largestIntervals.pop(); } } + } + + shouldTimeout(): boolean { + const now = Date.now(); + if(this.start - now > this.maxTrueTime) { + return true; + } return this.executionTime > this.maxExecutionTime; } @@ -674,6 +740,7 @@ namespace correction { } // Stage 2: the fun part; actually searching! + timer.resetOutlierCheck(); timer.startLoop(); let timedOut = false; do {