diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts index bbeaa82ed74..a6dd8a5238e 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/distance-modeler.ts @@ -636,7 +636,8 @@ export class SearchResult { * @returns */ export async function *getBestMatches(searchModules: SearchQuotientNode[], timer: ExecutionTimer): AsyncGenerator { - const spaceQueue = new PriorityQueue((a, b) => a.currentCost - b.currentCost); + const comparator = (a: SearchQuotientNode, b: SearchQuotientNode) => a.currentCost - b.currentCost; + let spaceQueue = new PriorityQueue(comparator); // Stage 1 - if we already have extracted results, build a queue just for them // and iterate over it first. @@ -664,6 +665,7 @@ export async function *getBestMatches(searchModules: SearchQuotientNode[], timer let lowestCostSource = spaceQueue.dequeue(); const newResult = lowestCostSource.handleNextNode(); spaceQueue.enqueue(lowestCostSource); + spaceQueue = new PriorityQueue(comparator, spaceQueue.toArray()); if(newResult.type == 'none') { return null; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts index c47b47a855b..f5f30ed841e 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts @@ -43,6 +43,7 @@ export class LegacyQuotientRoot extends SearchQuotientRoot { } this.processed.push(new SearchResult(node)); + this.saveResult(node); return { type: 'complete', cost: node.currentCost, diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts index 85fec75639e..703593b8a2d 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-spur.ts @@ -42,7 +42,6 @@ export class LegacyQuotientSpur extends SearchQuotientSpur { const codepointLength = space.codepointLength + insertLength - leftDeleteLength; super(space, inputs, inputSource, codepointLength); - this.queueNodes(this.buildEdgesForNodes(space.previousResults.map(r => r.node))); this.insertLength = insertLength; this.leftDeleteLength = inputSample.deleteLeft; return; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts index 1f63f84b158..5852b358445 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts @@ -10,7 +10,7 @@ import { QueueComparator as Comparator, PriorityQueue } from '@keymanapp/web-utils'; import { LexicalModelTypes } from '@keymanapp/common-types'; -import { SearchNode, SearchResult } from './distance-modeler.js'; +import { SearchNode } from './distance-modeler.js'; import { LegacyQuotientRoot } from './legacy-quotient-root.js'; import { generateSpaceSeed, InputSegment, PathResult, SearchQuotientNode } from './search-quotient-node.js'; import { SearchQuotientSpur } from './search-quotient-spur.js'; @@ -21,7 +21,7 @@ const PATH_QUEUE_COMPARATOR: Comparator = (a, b) => { // The set of search spaces corresponding to the same 'context' for search. // Whenever a wordbreak boundary is crossed, a new instance should be made. -export class SearchQuotientCluster implements SearchQuotientNode { +export class SearchQuotientCluster extends SearchQuotientNode { // While most functions can be done directly from SearchSpace, merging and // splitting will need access to SearchQuotientSpur-specific members. It's // also cleaner to not allow nested SearchQuotientClusters while we haven't @@ -29,14 +29,15 @@ export class SearchQuotientCluster implements SearchQuotientNode { private selectionQueue: PriorityQueue = new PriorityQueue(PATH_QUEUE_COMPARATOR); readonly spaceId: number; - // We use an array and not a PriorityQueue b/c batch-heapifying at a single point in time - // is cheaper than iteratively building a priority queue. + // We use an array and not a PriorityQueue b/c batch-heapifying at a single + // point in time is cheaper than iteratively building a priority queue. /** - * This tracks all paths that have reached the end of a viable input-matching path - even - * those of lower cost that produce the same correction as other paths. + * This tracks all paths that have reached the end of a viable input-matching + * path - even those of lower cost that produce the same correction as other + * paths. * - * When new input is received, its entries are then used to append edges to the path in order - * to find potential paths to reach a new viable end. + * When new input is received, its entries are then used to append edges to + * the path in order to find potential paths to reach a new viable end. */ private completedPaths?: SearchNode[] = []; @@ -63,6 +64,8 @@ export class SearchQuotientCluster implements SearchQuotientNode { * @param model */ constructor(inboundPaths: SearchQuotientNode[]) { + super(); + if(inboundPaths.length == 0) { throw new Error("SearchQuotientCluster requires an array with at least one SearchQuotientNode"); } @@ -145,8 +148,10 @@ export class SearchQuotientCluster implements SearchQuotientNode { const bestPath = this.selectionQueue.dequeue(); const currentResult = bestPath.handleNextNode(); this.selectionQueue.enqueue(bestPath); + this.selectionQueue = new PriorityQueue(PATH_QUEUE_COMPARATOR, this.selectionQueue.toArray()); if(currentResult.type == 'complete') { + this.saveResult(currentResult.finalNode); this.completedPaths?.push(currentResult.finalNode); currentResult.spaceId = this.spaceId; } @@ -154,10 +159,6 @@ export class SearchQuotientCluster implements SearchQuotientNode { return currentResult; } - public get previousResults(): SearchResult[] { - return this.completedPaths?.map((n => new SearchResult(n, this.spaceId))) ?? []; - } - get model(): LexicalModelTypes.LexicalModel { return this.parents[0].model; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index 429fe6030fd..da9253592c8 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -96,23 +96,74 @@ export interface PathInputProperties { * Represents all or a portion of the dynamically-generated graph used to search * for predictive-text corrections. */ -export interface SearchQuotientNode { +export abstract class SearchQuotientNode { + /** + * Holds all `incomingNode` child buffers - buffers to hold nodes processed by + * this SearchCluster but not yet by child SearchSpaces. + */ + private childQueues: SearchNode[][] = []; + + /** + * Marks all results that have already been returned from this instance of SearchPath. + * Should be deleted and cleared if any paths consider this one as a parent. + */ + private returnedValues?: {[resultKey: string]: SearchNode} = {}; + + + // The TS type system prevents this method from being rooted on the instance provided in + // the first parameter, sadly. + /** + * Links the provided queueing buffer to the provided parent node. When the + * parent produces new intermediate results, those results will be made + * available for use in construction of extended paths. + * @param parentNode + * @param childQueue + */ + protected linkAndQueueFromParent(parentNode: SearchQuotientNode, childQueue: SearchNode[]): void { + parentNode.childQueues.push(childQueue); + } + + /** + * Log the results of a processed node and queue it within all subscribed + * processor nodes for construction of deeper search paths. + * @param node + */ + protected saveResult(node: SearchNode): boolean { + const priorMatch = this.returnedValues[node.resultKey]; + if(priorMatch !== undefined && priorMatch.currentCost < node.currentCost) { + return false; + } + + this.returnedValues[node.resultKey] = node; + this.childQueues.forEach((buf) => buf.push(node)); + return true; + } + + /** + * Returns the set of existing, completed search-results with this node's domain. + */ + public get previousResults(): SearchResult[] { + return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v)); + } + + // -- Everything after this is abstract and implemented by derived child classes. + /** * Returns an identifier uniquely identifying this search-batching structure * by correction-search results. */ - readonly spaceId: number; + abstract get spaceId(): number; /** * The active LexicalModel for use with correction-search. */ - readonly model: LexicalModel; + abstract get model(): LexicalModel; /** * Notes the SearchQuotientNode(s) whose correction-search paths are extended by this * SearchQuotientNode. */ - readonly parents: SearchQuotientNode[]; + abstract get parents(): SearchQuotientNode[]; /** * Retrieves the lowest-cost / lowest-distance edge from the batcher's search @@ -120,20 +171,20 @@ export interface SearchQuotientNode { * what sort of result the edge's destination node represents. * @returns */ - handleNextNode(): PathResult; + abstract handleNextNode(): PathResult; /** * Increases the editing range that will be considered for determining * correction distances. */ - increaseMaxEditDistance(): void; + abstract increaseMaxEditDistance(): void; /** * Reports the cost of the lowest-cost / lowest-distance edge held within the * batcher's search area. * @returns */ - readonly currentCost: number; + abstract get currentCost(): number; /** * Provides a heuristic for the base cost at this path's depth if the best @@ -143,19 +194,14 @@ export interface SearchQuotientNode { * This cost is based on the negative log-likelihood of the probability and * includes the cost from the lowest possible parent nodes visited. */ - readonly lowestPossibleSingleCost: number; - - /** - * Returns the set of previously-processed results under this batcher's domain. - */ - readonly previousResults: SearchResult[]; + abstract readonly lowestPossibleSingleCost: number; /** * When true, this indicates that the currently-represented portion of context * has fat-finger data available, which itself indicates that the user has * corrections enabled. */ - readonly correctionsEnabled: boolean; + abstract readonly correctionsEnabled: boolean; /** * Reports the total number of input keystrokes represented by this @@ -164,32 +210,32 @@ export interface SearchQuotientNode { * (Their fat-finger alternates, when provided, do not influence this count - * they're associated with the original keystroke that affected the context.) */ - readonly inputCount: number; + abstract readonly inputCount: number; /** * Reports the length in codepoints of corrected text represented by completed * paths from this instance. */ - readonly codepointLength: number; + abstract readonly codepointLength: number; /** * Determines the best example text representable by this SearchQuotientNode's * portion of the correction-search graph and its paths. */ - readonly bestExample: { text: string, p: number }; + abstract readonly bestExample: { text: string, p: number }; /** * Gets components representing the keystroke range corrected by this * search-space quotient node. If only part of any keystroke's effects are * used, this will also be noted. */ - readonly inputSegments: InputSegment[]; + abstract readonly inputSegments: InputSegment[]; /** * Gets a compact string-based representation of `inputRange` that * maps compatible token source ranges to each other. */ - get sourceRangeKey(): string; + abstract get sourceRangeKey(): string; /** * Appends this SearchQuotientNode with the provided SearchQuotientNode's search properties, @@ -198,7 +244,7 @@ export interface SearchQuotientNode { * of any split input components will be fully re-merged. * @param space */ - merge(space: SearchQuotientNode): SearchQuotientNode; + abstract merge(space: SearchQuotientNode): SearchQuotientNode; /** * Splits this SearchQuotientNode into two halves at the specified codepoint index. @@ -211,7 +257,7 @@ export interface SearchQuotientNode { * SearchSpace instance. * @param charIndex */ - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][]; + abstract split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][]; /** * Determines if the SearchQuotientNode is a duplicate of another instance. @@ -219,5 +265,5 @@ export interface SearchQuotientNode { * path(s) taken to reach each must be 100% identical. * @param node */ - isSameNode(node: SearchQuotientNode): boolean; + abstract isSameNode(node: SearchQuotientNode): boolean; } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts index 90e8e83822f..8c2d4cee093 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts @@ -1,7 +1,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types'; -import { SearchNode, SearchResult } from './distance-modeler.js'; +import { SearchNode } from './distance-modeler.js'; import { generateSpaceSeed, InputSegment, PathResult, SearchQuotientNode } from './search-quotient-node.js'; import { SearchQuotientSpur } from './search-quotient-spur.js'; @@ -9,10 +9,9 @@ import LexicalModel = LexicalModelTypes.LexicalModel; // The set of search spaces corresponding to the same 'context' for search. // Whenever a wordbreak boundary is crossed, a new instance should be made. -export class SearchQuotientRoot implements SearchQuotientNode { +export class SearchQuotientRoot extends SearchQuotientNode { readonly rootNode: SearchNode; readonly model: LexicalModel; - private readonly rootResult: SearchResult; readonly lowestPossibleSingleCost: number = 0; @@ -23,15 +22,15 @@ export class SearchQuotientRoot implements SearchQuotientNode { private hasBeenProcessed: boolean = false; /** - * Constructs a fresh SearchQuotientRoot instance to be used as the root of - * the predictive-text correction / suggestion search process. + * Constructs a fresh SearchSpace instance for used in predictive-text correction + * and suggestion searches. * @param baseSpaceId * @param model */ constructor(model: LexicalModel) { + super(); this.rootNode = new SearchNode(model.traverseFromRoot(), generateSpaceSeed(), t => model.toKey(t)); this.model = model; - this.rootResult = new SearchResult(this.rootNode); } get spaceId(): number { @@ -69,6 +68,7 @@ export class SearchQuotientRoot implements SearchQuotientNode { this.hasBeenProcessed = true; + this.saveResult(this.rootNode); return { type: 'complete', cost: 0, @@ -81,14 +81,6 @@ export class SearchQuotientRoot implements SearchQuotientNode { return this.hasBeenProcessed ? Number.POSITIVE_INFINITY : 0; } - get previousResults(): SearchResult[] { - if(!this.hasBeenProcessed) { - return []; - } else { - return [this.rootResult]; - } - } - // Return a new array each time; avoid aliasing potential! get inputSegments(): InputSegment[] { return []; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index d427b24c137..cf09b297615 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -12,7 +12,7 @@ import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keyman import { LexicalModelTypes } from '@keymanapp/common-types'; import { buildMergedTransform } from '@keymanapp/models-templates'; -import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js'; +import { EDIT_DISTANCE_COST_SCALE, SearchNode } from './distance-modeler.js'; import { generateSpaceSeed, InputSegment, PathInputProperties, PathResult, SearchQuotientNode } from './search-quotient-node.js'; import { generateSubsetId } from './tokenization-subsets.js'; import { SearchQuotientRoot } from './search-quotient-root.js'; @@ -29,8 +29,15 @@ export const QUEUE_NODE_COMPARATOR: Comparator = function(arg1, arg2 // The set of search spaces corresponding to the same 'context' for search. // Whenever a wordbreak boundary is crossed, a new instance should be made. -export abstract class SearchQuotientSpur implements SearchQuotientNode { +export abstract class SearchQuotientSpur extends SearchQuotientNode { private selectionQueue: PriorityQueue = new PriorityQueue(QUEUE_NODE_COMPARATOR); + + /** + * Holds all incoming Nodes generated from a parent `SearchSpace` that have not yet been + * extended with this `SearchSpace`'s input. + */ + private incomingNodes: SearchNode[] = []; + readonly inputs?: Distribution; readonly inputSource?: PathInputProperties; @@ -42,12 +49,6 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { public abstract readonly insertLength: number; public abstract readonly leftDeleteLength: number - /** - * Marks all results that have already been returned from this instance of SearchPath. - * Should be deleted and cleared if any paths consider this one as a parent. - */ - private returnedValues?: {[resultKey: string]: SearchNode} = {}; - /** * Provides a heuristic for the base cost at this path's depth if the best * individual input were taken here, regardless of whether or not that's possible. @@ -72,6 +73,7 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { inputSource: PathInputProperties | ProbabilityMass, codepointLength: number ) { + super(); this.spaceId = generateSpaceSeed(); // Coerce inputSource to TokenInputSource format. @@ -99,6 +101,9 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { this.inputs = inputs?.length > 0 ? inputs : null; this.inputCount = parentNode.inputCount + (this.inputs ? 1 : 0); this.codepointLength = codepointLength; + + this.queueNodes(this.buildEdgesForNodes(parentNode.previousResults.map(r => r.node))); + this.linkAndQueueFromParent(parentNode, this.incomingNodes); } public get model(): LexicalModel { @@ -309,6 +314,14 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { } public get currentCost(): number { + if(this.incomingNodes.length > 0) { + this.queueNodes(this.buildEdgesForNodes(this.incomingNodes)); + + // Preserve the array instance, but trash all entries. + // The array is registered with the parent; do not replace! + this.incomingNodes.splice(0, this.incomingNodes.length); + } + const parentCost = this.parentNode?.currentCost ?? Number.POSITIVE_INFINITY; const localCost = this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY; @@ -328,10 +341,18 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { * @returns */ public handleNextNode(): PathResult { + if(this.incomingNodes.length > 0) { + this.queueNodes(this.buildEdgesForNodes(this.incomingNodes)); + + // Preserve the array instance, but trash all entries. + // The array is registered with the parent; do not replace! + this.incomingNodes.splice(0, this.incomingNodes.length); + } + const parentCost = this.parentNode?.currentCost ?? Number.POSITIVE_INFINITY; const localCost = this.selectionQueue.peek()?.currentCost ?? Number.POSITIVE_INFINITY; - if(parentCost <= localCost) { + if(parentCost < localCost) { if(parentCost == Number.POSITIVE_INFINITY) { return { type: 'none' @@ -339,6 +360,12 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { } const result = this.parentNode.handleNextNode(); + // The parent will insert the node into our queue. We don't need it, though + // any siblings certainly will. + + // Preserve the array instance, but trash all entries. + // The array is registered with the parent; do not replace! + this.incomingNodes.splice(0, this.incomingNodes.length); if(result.type == 'complete') { this.queueNodes(this.buildEdgesForNodes([result.finalNode])); @@ -395,13 +422,10 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { } if(currentNode.spaceId == this.spaceId) { - if(this.returnedValues) { - if((this.returnedValues[currentNode.resultKey]?.currentCost ?? Number.POSITIVE_INFINITY) > currentNode.currentCost) { - this.returnedValues[currentNode.resultKey] = currentNode; - } else { - // Not a better cost, so reject it and move on to the next potential result. - return this.handleNextNode(); - } + const isUnhandled = this.saveResult(currentNode); + if(!isUnhandled) { + // Not a better cost, so reject it and move on to the next potential result. + return this.handleNextNode(); } return { @@ -416,10 +440,6 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { return unmatchedResult as PathResult; } - public get previousResults(): SearchResult[] { - return Object.values(this.returnedValues ?? {}).map(v => new SearchResult(v)); - } - public get inputSegments(): InputSegment[] { if(!this.parentNode) { return []; diff --git a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts index a72fac116bc..6a66776f55a 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/test-index.ts @@ -20,4 +20,4 @@ export { ModelCompositor } from './model-compositor.js'; export * from './predict-helpers.js'; export { default as TransformUtils } from './transformUtils.js' export { default as LMLayerWorker } from './index.js' -export * from './transform-subsets.js'; \ No newline at end of file +export * from './transform-subsets.js'; diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/getBestMatches.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/getBestMatches.tests.ts index b776a3ca45b..7ff0629859c 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/getBestMatches.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/getBestMatches.tests.ts @@ -10,7 +10,7 @@ import { assert } from 'chai'; import { jsonFixture } from '@keymanapp/common-test-resources/model-helpers.mjs'; -import { correction, getBestMatches, LegacyQuotientSpur, models, LegacyQuotientRoot } from '@keymanapp/lm-worker/test-index'; +import { correction, generateSpaceSeed, getBestMatches, LegacyQuotientSpur, models, LegacyQuotientRoot, SearchQuotientCluster } from '@keymanapp/lm-worker/test-index'; import SearchResult = correction.SearchResult; import TrieModel = models.TrieModel; @@ -21,178 +21,419 @@ function buildTestTimer() { return new correction.ExecutionTimer(Number.MAX_VALUE, Number.MAX_VALUE); } -describe('getBestMatches', () => { - const checkRepeatableResults_teh = async (iter: AsyncGenerator) => { - const firstIterResult = await iter.next(); // {value: , done: } - assert.isFalse(firstIterResult.done); - - const firstResult: correction.SearchResult = firstIterResult.value; // Retrieves - // No checks on the first set's cost. - assert.equal(firstResult.matchString, "ten"); - - // All start with 'te' but one, and invoke one edit of the same cost. - // 'th' has an 'h' at the same cost (input 3) of the 'e' (input 2). - const secondBatch = [ - 'tec', 'tel', 'tem', - 'ter', 'tes', 'th', - 'te' - ]; - - async function checkBatch(batch: string[], prevCost: number) { - let cost; - while(batch.length > 0) { - const iter_result = await iter.next(); - assert.isFalse(iter_result.done); - - const result = iter_result.value; - assert.isAbove(result.totalCost, prevCost); - if(cost !== undefined) { - assert.equal(result.totalCost, cost); - } else { - cost = result.totalCost; +describe('Correction Searching', () => { + describe('without multi-tokenization; using a single SearchPath sequence', () => { + const checkRepeatableResults_teh = async (iter: AsyncGenerator) => { + const firstIterResult = await iter.next(); // {value: , done: } + assert.isFalse(firstIterResult.done); + + const firstResult: correction.SearchResult = firstIterResult.value; // Retrieves + // No checks on the first set's cost. + assert.equal(firstResult.matchString, "ten"); + + // All start with 'te' but one, and invoke one edit of the same cost. + // 'th' has an 'h' at the same cost (input 3) of the 'e' (input 2). + const secondBatch = [ + 'tec', 'tel', 'tem', + 'ter', 'tes', 'th', + 'te' + ]; + + async function checkBatch(batch: string[], prevCost: number) { + let cost; + while(batch.length > 0) { + const iter_result = await iter.next(); + assert.isFalse(iter_result.done); + + const result = iter_result.value; + assert.isAbove(result.totalCost, prevCost); + if(cost !== undefined) { + assert.equal(result.totalCost, cost); + } else { + cost = result.totalCost; + } + + const matchIndex = batch.findIndex((entry) => entry == result.matchString); + assert.notEqual(matchIndex, -1, `'${result.matchString}' received as prediction too early`); + batch.splice(matchIndex, 1); } - const matchIndex = batch.findIndex((entry) => entry == result.matchString); - assert.notEqual(matchIndex, -1, `'${result.matchString}' received as prediction too early`); - batch.splice(matchIndex, 1); + return cost; } - return cost; + const secondCost = await checkBatch(secondBatch, firstResult.totalCost); + + // Single hard edit, all other input probability aspects are equal + const thirdBatch = [ + // 't' -> 'b' (sub) + 'beh', + // '' -> 'c' (insertion) + 'tech' + ]; + + await checkBatch(thirdBatch, secondCost); + + // All replace the low-likelihood case for the third input. + const fourthBatch = [ + 'the', 'thi', 'tho', 'thr', + 'thu', 'tha' + ]; + + await checkBatch(fourthBatch, secondCost); + + // Replace the _first_ input's char OR insert an extra char, + // also matching the low-likelihood third-char option. + const fifthBatch = [ + 'cen', 'en', 'gen', + 'ken', 'len', 'men', + 'sen', 'then', 'wen' + ]; + + await checkBatch(fifthBatch, secondCost); } - const secondCost = await checkBatch(secondBatch, firstResult.totalCost); - - // Single hard edit, all other input probability aspects are equal - const thirdBatch = [ - // 't' -> 'b' (sub) - 'beh', - // '' -> 'c' (insertion) - 'tech' - ]; - - await checkBatch(thirdBatch, secondCost); - - // All replace the low-likelihood case for the third input. - const fourthBatch = [ - 'the', 'thi', 'tho', 'thr', - 'thu', 'tha' - ]; - - await checkBatch(fourthBatch, secondCost); - - // Replace the _first_ input's char OR insert an extra char, - // also matching the low-likelihood third-char option. - const fifthBatch = [ - 'cen', 'en', 'gen', - 'ken', 'len', 'men', - 'sen', 'then', 'wen' - ]; - - await checkBatch(fifthBatch, secondCost); - } - - it('Empty search root, loaded model', async () => { - // The combinatorial effect here is a bit much to fully test. - const rootTraversal = testModel.traverseFromRoot(); - assert.isNotEmpty(rootTraversal); - - const searchSpace = new LegacyQuotientRoot(testModel); - const timer = buildTestTimer(); - const iter = getBestMatches([searchSpace], timer); - - // While there's no input, insertion operations can produce suggestions. - const resultState = await iter.next(); - const result: SearchResult = resultState.value; - - // Just one suggestion root should be returned as the first result. - assert.equal(result.totalCost, 0); // Gives a perfect match - assert.equal(result.matchString, ''); // an empty match string. - assert.isFalse(resultState.done); - - // Should be able to reach more, though. - const laterResultState = await iter.next(); - const laterResult: SearchResult = laterResultState.value; - - // Edit required: an 'insertion' edge (no input matched, but char pulled - // from lexicon) - assert.isOk(laterResult); - assert.isAbove(laterResult.totalCost, 0); - // The most likely word in the lexicon starts with 't'. - assert.equal(laterResult.matchString, 't'); - assert.isFalse(resultState.done); + it('Simple search without input', async () => { + // The combinatorial effect here is a bit much to fully test. + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const searchSpace = new LegacyQuotientRoot(testModel); + + const iter = getBestMatches([searchSpace], buildTestTimer()); + const firstResult = await iter.next(); + assert.isFalse(firstResult.done); + }); + + // Hmm... how best to update this... + it('Simple search (paralleling "Small integration test")', async () => { + // The combinatorial effect here is a bit much to fully test. + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const searchPath = new LegacyQuotientRoot(testModel); + + // VERY artificial distributions. + const synthInput1 = [ + {sample: {insert: 't', deleteLeft: 0}, p: 1} // Transform, probability + ]; + + const synthInput2 = [ + {sample: {insert: 'e', deleteLeft: 0}, p: 0.75}, // Transform, probability + {sample: {insert: 'h', deleteLeft: 0}, p: 0.25} + ]; + + const synthInput3 = [ + {sample: {insert: 'h', deleteLeft: 0}, p: 0.75}, // Transform, probability + {sample: {insert: 'n', deleteLeft: 0}, p: 0.25} + ]; + + const searchPath1 = new LegacyQuotientSpur(searchPath, synthInput1, synthInput1[0]); + const searchPath2 = new LegacyQuotientSpur(searchPath1, synthInput2, synthInput2[0]); + const searchPath3 = new LegacyQuotientSpur(searchPath2, synthInput3, synthInput3[0]); + + assert.notEqual(searchPath1.spaceId, searchPath.spaceId); + assert.notEqual(searchPath2.spaceId, searchPath1.spaceId); + assert.notEqual(searchPath3.spaceId, searchPath2.spaceId); + + const iter = getBestMatches([searchPath3], buildTestTimer()); // disables the correction-search timeout. + await checkRepeatableResults_teh(iter); + }); + + it('Allows reiteration (sequentially)', async () => { + // The combinatorial effect here is a bit much to fully test. + const rootTraversal = testModel.traverseFromRoot(); + assert.isNotEmpty(rootTraversal); + + const searchPath = new LegacyQuotientRoot(testModel); + + // VERY artificial distributions. + const synthInput1 = [ + {sample: {insert: 't', deleteLeft: 0}, p: 1} // Transform, probability + ]; + + const synthInput2 = [ + {sample: {insert: 'e', deleteLeft: 0}, p: 0.75}, // Transform, probability + {sample: {insert: 'h', deleteLeft: 0}, p: 0.25} + ]; + + const synthInput3 = [ + {sample: {insert: 'h', deleteLeft: 0}, p: 0.75}, // Transform, probability + {sample: {insert: 'n', deleteLeft: 0}, p: 0.25} + ]; + + const searchPath1 = new LegacyQuotientSpur(searchPath, synthInput1, synthInput1[0]); + const searchPath2 = new LegacyQuotientSpur(searchPath1, synthInput2, synthInput2[0]); + const searchPath3 = new LegacyQuotientSpur(searchPath2, synthInput3, synthInput3[0]); + + assert.notEqual(searchPath1.spaceId, searchPath.spaceId); + assert.notEqual(searchPath2.spaceId, searchPath1.spaceId); + assert.notEqual(searchPath3.spaceId, searchPath2.spaceId); + + const iter = getBestMatches([searchPath3], buildTestTimer()); // disables the correction-search timeout. + await checkRepeatableResults_teh(iter); + + // The key: do we get the same results the second time? + // Reset the iterator first... + const iter2 = getBestMatches([searchPath3], buildTestTimer()); // disables the correction-search timeout. + await checkRepeatableResults_teh(iter2); + }); }); - // Hmm... how best to update this... - it('Simple search (paralleling "Small integration test")', async () => { - // The combinatorial effect here is a bit much to fully test. - const rootTraversal = testModel.traverseFromRoot(); - assert.isNotEmpty(rootTraversal); + describe('with divergent SearchSpaces', () => { + const buildPathFixture = () => { + const rootPath = new LegacyQuotientRoot(testModel); + + const distrib_t1 = [ + { sample: { insert: 't', deleteLeft: 0, id: 11 }, p: 1 } + ]; + const tPath = new LegacyQuotientSpur(rootPath, distrib_t1, distrib_t1[0]); + + // Note: this does not reflect the actual intended use pattern for these + // types. It's useful for clear testing, though. + // + // In particular, this test is acting as if the following characters + // wouldn't be part of the same TokenizationPath, yet also using the same + // subsetId, as if they were part of the same TokenizationPath. + const distrib_h2 = [ + { sample: { insert: 'h', deleteLeft: 0, id: 12 }, p: 0.5 } + ]; + const distrib_o2 = [ + { sample: { insert: 'o', deleteLeft: 0, id: 12 }, p: 0.3 } + ]; + const distrib_i2 = [ + { sample: { insert: 'r', deleteLeft: 0, id: 12 }, p: 0.2 } + ]; + + const thPath = new LegacyQuotientSpur(tPath, distrib_h2, distrib_h2[0]); + const toPath = new LegacyQuotientSpur(tPath, distrib_o2, thPath.inputSource); + const trPath = new LegacyQuotientSpur(tPath, distrib_i2, thPath.inputSource); + + const twoCharCluster = new SearchQuotientCluster([thPath, toPath, trPath]); + + const distrib_v3 = [ + { sample: { insert: 'e', deleteLeft: 0, id: 13 }, p: 0.4 }, + { sample: { insert: 'o', deleteLeft: 0, id: 13 }, p: 0.3 }, + { sample: { insert: 'a', deleteLeft: 0, id: 13 }, p: 0.2 }, + { sample: { insert: 'i', deleteLeft: 0, id: 13 }, p: 0.1 } + ]; + + const thvPath = new LegacyQuotientSpur(thPath, distrib_v3, distrib_v3[0]); + const tovPath = new LegacyQuotientSpur(toPath, distrib_v3, thvPath.inputSource); + const trvPath = new LegacyQuotientSpur(trPath, distrib_v3, thvPath.inputSource); + + const clvPath = new LegacyQuotientSpur(twoCharCluster, distrib_v3, thvPath.inputSource); + + const distrib_n4 = [ + { sample: { insert: 'n', deleteLeft: 0, id: 14 }, p: 0.4 }, + { sample: { insert: 'u', deleteLeft: 0, id: 14 }, p: 0.1 } + ]; + const distrib_v3r = [ + { sample: { insert: 'é', deleteLeft: 1, id: 14 }, p: 0.2 }, + { sample: { insert: 'ó', deleteLeft: 1, id: 14 }, p: 0.15 }, + { sample: { insert: 'á', deleteLeft: 1, id: 14 }, p: 0.1 }, + { sample: { insert: 'í', deleteLeft: 1, id: 14 }, p: 0.05 } + ]; + + const thvnPath = new LegacyQuotientSpur(thvPath, distrib_n4, distrib_n4[0]); + const tovnPath = new LegacyQuotientSpur(tovPath, distrib_n4, thvnPath.inputSource); + const trvnPath = new LegacyQuotientSpur(trvPath, distrib_n4, thvnPath.inputSource); + const clvnPath = new LegacyQuotientSpur(clvPath, distrib_n4, thvnPath.inputSource); + + const thvrPath = new LegacyQuotientSpur(thvPath, distrib_v3r, {...thvnPath.inputSource, subsetId: generateSpaceSeed()}); + const tovrPath = new LegacyQuotientSpur(tovPath, distrib_v3r, thvrPath.inputSource); + const trvrPath = new LegacyQuotientSpur(trvPath, distrib_v3r, thvrPath.inputSource); + const clvrPath = new LegacyQuotientSpur(clvPath, distrib_v3r, thvrPath.inputSource); + + const paths = { + clusterless: { + thvnPath, tovnPath, trvnPath, thvrPath, tovrPath, trvrPath + }, + clustered: { + clvnPath, clvrPath + } + }; + + const clusterVsPaths = { + paths: { + thPath, trPath, toPath + }, + cluster: twoCharCluster + }; - let searchPath = new LegacyQuotientRoot(testModel); + return {paths, clusterVsPaths}; + } - // VERY artificial distributions. - const synthInput1 = [ - {sample: {insert: 't', deleteLeft: 0}, p: 1} // Transform, probability - ]; + it('correctly searches across multiple paths with common ancestry (clusterless)', async () => { + const paths = buildPathFixture().paths.clusterless; - const synthInput2 = [ - {sample: {insert: 'e', deleteLeft: 0}, p: 0.75}, // Transform, probability - {sample: {insert: 'h', deleteLeft: 0}, p: 0.25} - ]; + const gen_thvn = getBestMatches([paths.thvnPath], buildTestTimer()); + assert.equal(((await gen_thvn.next()).value as SearchResult).matchString, 'then'); - const synthInput3 = [ - {sample: {insert: 'h', deleteLeft: 0}, p: 0.75}, // Transform, probability - {sample: {insert: 'n', deleteLeft: 0}, p: 0.25} - ]; + // Passes through both t and h, then diverges. + const gen_thvr = getBestMatches([paths.thvrPath], buildTestTimer()); + assert.equal(((await gen_thvr.next()).value as SearchResult).matchString, 'the'); - const searchPath1 = new LegacyQuotientSpur(searchPath, synthInput1, synthInput1[0]); - const searchPath2 = new LegacyQuotientSpur(searchPath1, synthInput2, synthInput2[0]); - const searchPath3 = new LegacyQuotientSpur(searchPath2, synthInput3, synthInput3[0]); + // Passes through t, then diverges + const gen_trvn = getBestMatches([paths.trvnPath], buildTestTimer()); + assert.equal(((await gen_trvn.next()).value as SearchResult).matchString, 'trou'); - assert.notEqual(searchPath1.spaceId, searchPath.spaceId); - assert.notEqual(searchPath2.spaceId, searchPath1.spaceId); - assert.notEqual(searchPath3.spaceId, searchPath2.spaceId); + // Passes through t and r, then diverges. + const gen_trvr = getBestMatches([paths.trvrPath], buildTestTimer()); - const iter = getBestMatches([searchPath3], buildTestTimer()); // disables the correction-search timeout. - await checkRepeatableResults_teh(iter); - }); + // Do we get further expected results if we keep querying? + assert.equal(((await gen_trvr.next()).value as SearchResult).matchString, 'tre'); + assert.equal(((await gen_trvr.next()).value as SearchResult).matchString, 'tro'); + assert.equal(((await gen_trvr.next()).value as SearchResult).matchString, 'tra'); + }); - it('Allows reiteration (sequentially)', async () => { - // The combinatorial effect here is a bit much to fully test. - const rootTraversal = testModel.traverseFromRoot(); - assert.isNotEmpty(rootTraversal); + it('correctly searches across multiple paths with common ancestry (clustered)', async () => { + const paths = buildPathFixture().paths.clustered; - let searchPath = new LegacyQuotientRoot(testModel); + const gen_clvn = getBestMatches([paths.clvnPath], buildTestTimer()); + const clvnMatches: string[] = []; - // VERY artificial distributions. - const synthInput1 = [ - {sample: {insert: 't', deleteLeft: 0}, p: 1} // Transform, probability - ]; + for(let i=0; i < 10; i++) { + clvnMatches.push(((await gen_clvn.next()).value as SearchResult).matchString); + } + assert.includeMembers(clvnMatches, ['then', 'than', 'thin', 'thou', 'trou']); - const synthInput2 = [ - {sample: {insert: 'e', deleteLeft: 0}, p: 0.75}, // Transform, probability - {sample: {insert: 'h', deleteLeft: 0}, p: 0.25} - ]; + // Passes through both t and h, then diverges. + const gen_clvr = getBestMatches([paths.clvrPath], buildTestTimer()); + const clvrMatches: string[] = []; - const synthInput3 = [ - {sample: {insert: 'h', deleteLeft: 0}, p: 0.75}, // Transform, probability - {sample: {insert: 'n', deleteLeft: 0}, p: 0.25} - ]; + for(let i=0; i < 10; i++) { + clvrMatches.push(((await gen_clvr.next()).value as SearchResult).matchString); + } + assert.includeMembers(clvrMatches, ['the', 'tho', 'tha', 'tre', 'tro', 'thi']); + }); + + it('correctly searches across multiple paths when search is unevenly staggered', async () => { + const isolatedPaths = buildPathFixture().paths.clustered; - const searchPath1 = new LegacyQuotientSpur(searchPath, synthInput1, synthInput1[0]); - const searchPath2 = new LegacyQuotientSpur(searchPath1, synthInput2, synthInput2[0]); - const searchPath3 = new LegacyQuotientSpur(searchPath2, synthInput3, synthInput3[0]); + const gen_clvn1 = getBestMatches([isolatedPaths.clvnPath], buildTestTimer()); + const isolatedClvnMatches: Set = new Set(); - assert.notEqual(searchPath1.spaceId, searchPath.spaceId); - assert.notEqual(searchPath2.spaceId, searchPath1.spaceId); - assert.notEqual(searchPath3.spaceId, searchPath2.spaceId); + const SET_COUNT = 3; + const COUNT_PER_SET = 4; + const TOTAL_COUNT = SET_COUNT * COUNT_PER_SET; - const iter = getBestMatches([searchPath3], buildTestTimer()); // disables the correction-search timeout. - await checkRepeatableResults_teh(iter); + while(isolatedClvnMatches.size < TOTAL_COUNT) { + isolatedClvnMatches.add(((await gen_clvn1.next()).value as SearchResult).matchString); + } + + // Passes through both t and h, then diverges. + const gen_clvr1 = getBestMatches([isolatedPaths.clvrPath], buildTestTimer()); + const isolatedClvrMatches: Set = new Set(); + + while(isolatedClvrMatches.size < TOTAL_COUNT) { + isolatedClvrMatches.add(((await gen_clvr1.next()).value as SearchResult).matchString); + } + + // Rebuild anew, and stagger searching four at a time on each, landing on 12 in total per. + const paths = buildPathFixture().paths.clustered; + + const gen_clvn2 = getBestMatches([paths.clvnPath], buildTestTimer()); + const gen_clvr2 = getBestMatches([paths.clvrPath], buildTestTimer()); + + const clvnMatches: Set = new Set(); + const clvrMatches: Set = new Set(); + + // Follow the search paths in a staggered manner; this may cause some + // results in one to be of higher cost than what's available from the + // other. + for(let s=0; s < SET_COUNT; s++) { + const SET_MAX = (s + 1) * COUNT_PER_SET; + while(clvnMatches.size < SET_MAX) { + clvnMatches.add(((await gen_clvn2.next()).value as SearchResult).matchString); + } + + while(clvrMatches.size < SET_MAX) { + clvrMatches.add(((await gen_clvr2.next()).value as SearchResult).matchString); + } + } + + assert.sameDeepMembers([...clvnMatches], [...isolatedClvnMatches]); + assert.sameDeepMembers([...clvrMatches], [...isolatedClvrMatches]); + }); + + it('returns the same results, in order, from SearchCluster as from constituent SearchPaths', async () => { + // See issue #14366 - duplicate results may appear due to a later + // right-delete having a lower-cost total than its parent. We use `Set`s + // here to avoid duplicate issues and look simply at what results arise + // first. + // + // From the fixture's construction, note `distrib_v3` and `distrib_v3r`. + // The "duplicate results" scenario arises when the key selected from + // `distrib_v3` does not match, but is deleted and replaced by a valid key + // from `distrib_v3r`. As the latter is reached later, with lower cost, + // it does get reported again. Resolving #14366 properly should help + // mitigate this issue. + + // --- + + // Build independently; let the cluster own a separate, disconnected copy of the paths. + const {paths: pathTest} = buildPathFixture().clusterVsPaths; + + // Validate that the paths individually return the following match strings. + const gen_th = getBestMatches([pathTest.thPath], buildTestTimer()); + assert.equal(((await gen_th.next()).value as SearchResult).matchString, 'th'); + + const gen_to = getBestMatches([pathTest.toPath], buildTestTimer()); + assert.equal(((await gen_to.next()).value as SearchResult).matchString, 'to'); + + const gen_tr = getBestMatches([pathTest.trPath], buildTestTimer()); + assert.equal(((await gen_tr.next()).value as SearchResult).matchString, 'tr'); + + // And now for the real test. + + const {cluster} = buildPathFixture().clusterVsPaths; + // Build independently; let the cluster own a separate, disconnected copy of the paths. + const {paths} = buildPathFixture().clusterVsPaths; + + const clusterGen = getBestMatches([cluster], buildTestTimer()); + const pathsGen = getBestMatches([...Object.values(paths)], buildTestTimer()); + + const genResults: SearchResult[] = []; + const pathsResults: SearchResult[] = []; + + // Changes to implementation could cause a slight reordering of equal-cost entries. + // Take all entries within a set cost instead. + let baseCost = 0; + while(baseCost < 6) { + const nextFromCluster = (await clusterGen.next()).value as SearchResult; + const nextFromPaths = (await pathsGen.next()).value as SearchResult; + genResults.push(nextFromCluster); + // This one can see duplicates for some prefixes due to some paths having outbound + // paths of lower total cost. + pathsResults.push(nextFromPaths); + + assert.isAtLeast(nextFromCluster.totalCost, baseCost); + assert.isAtLeast(nextFromPaths.totalCost, baseCost); + baseCost = Math.max(baseCost, nextFromCluster.totalCost, nextFromPaths.totalCost); + } - // The key: do we get the same results the second time? - // Reset the iterator first... - const iter2 = getBestMatches([searchPath3], buildTestTimer()); // disables the correction-search timeout. - await checkRepeatableResults_teh(iter2); + assert.deepEqual(genResults.map(r => r.matchString), pathsResults.map(r => r.matchString)); + + // Ensure that all of the clearly-supported prefixes above show up as results. + assert.sameDeepMembers(pathsResults.slice(0, 3).map(r => r.matchString), ['th', 'to', 'tr']); + // These involve likely-enough corrections that should show, given the model fixture. + assert.includeDeepMembers(pathsResults.map(r => r.matchString), [ + 'ty', // 'type' is quite frequent according to the text fixture. + 't', // Deleting the second keystroke outright lands here. + 'oth', // What if we insert an 'o' early on? 'other' is a very common English word + 'ti' // 'time' is pretty common too. + ]); + + // NOTE: this level of corrections does not yet consider the word likelihood - only + // the raw correction cost. No ordering of "likely word" to "unlikely word" should + // occur yet. + + // 'time': weight 934 + // 'type': weight 540 + const timeResult = pathsResults.find(r => r.matchString == 'ti'); + const typeResult = pathsResults.find(r => r.matchString == 'ty'); + // Correction to either should be equally likely. + assert.equal(timeResult.totalCost, typeResult.totalCost); + }); }); });