From 4f1435c27466c1a381ebd8d6b17dac7e28523406 Mon Sep 17 00:00:00 2001 From: Joshua Horton Date: Thu, 29 Jan 2026 13:51:45 -0600 Subject: [PATCH] change(web): change .split() signature for handling divergent results When SearchQuotientCluster nodes are split, there is no guarantee that the split will be perfectly clean for all paths leading into the cluster. Even if so, there's also no guarantee that it will be placed the same way for all such paths. Suppose the following cases: - a, bc, d, e - a, b, c, de Splitting at index 3 may result in a clean split both ways, but the first sequence splits after the second input, while the second sequence splits after the third. These cannot be clustered together due to representing different (diverging) intervals of the user's keystroke-input sequence. Splitting at index 4 has its own version of this problem: the first sequence splits cleanly after 3 inputs, while the second splits in the middle of the 4th input. Again, the represented input intervals diverge, requiring different representations for the split results. Build-bot: skip build:web Test-bot: skip --- .../src/main/correction/context-token.ts | 7 +- .../main/correction/legacy-quotient-root.ts | 4 +- .../correction/search-quotient-cluster.ts | 2 +- .../main/correction/search-quotient-node.ts | 7 +- .../main/correction/search-quotient-root.ts | 4 +- .../main/correction/search-quotient-spur.ts | 10 +-- .../search-quotient-spur.tests.ts | 78 +++++++++++++------ 7 files changed, 76 insertions(+), 36 deletions(-) diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 458ced59d66..2f250f0e7c0 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -212,7 +212,12 @@ export class ContextToken { if(splitSpecs.length == 0 && spec.textOffset == 0) { searchSplits.push(searchSpace); } else { - const splitSpaces = searchSpace.split(spec.textOffset); + // Note: it is conceivable for a token to split into multiple potential + // tokens due to variations in how the text's construction proceeded up + // to the point of the split. + // + // For now, as a stopgap, we simply take the first such split and roll with that. + const splitSpaces = searchSpace.split(spec.textOffset)[0]; searchSplits.push(splitSpaces[1]); searchSpace = splitSpaces[0]; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts index f0bfbaa5009..c47b47a855b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts @@ -59,7 +59,7 @@ export class LegacyQuotientRoot extends SearchQuotientRoot { return this.processed.slice(); } - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { - return [this, new LegacyQuotientRoot(this.model)]; + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { + return [[this, new LegacyQuotientRoot(this.model)]]; } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts index 117cd9687d3..d64598e9654 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts @@ -226,7 +226,7 @@ export class SearchQuotientCluster implements SearchQuotientNode { return new SearchQuotientCluster(parentMerges); } - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { throw new Error('Method not implemented.'); } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index c88f2f33301..429fe6030fd 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -204,9 +204,14 @@ export interface SearchQuotientNode { * Splits this SearchQuotientNode into two halves at the specified codepoint index. * The 'head' component will maximally re-use existing cached data, while the * 'tail' must be reconstructed from scratch due to the new start position. + * + * It is possible that there are multiple distinct ways to split the + * SearchSpace into halves if the split is not consistently clean (between + * input boundaries) for all possible path-sequences modeled by the original + * SearchSpace instance. * @param charIndex */ - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode]; + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][]; /** * Determines if the SearchQuotientNode is a duplicate of another instance. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts index 61399815609..90e8e83822f 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts @@ -117,8 +117,8 @@ export class SearchQuotientRoot implements SearchQuotientNode { return true; } - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { - return [this, new SearchQuotientRoot(this.model)]; + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { + return [[this, new SearchQuotientRoot(this.model)]]; } merge(space: SearchQuotientNode): SearchQuotientNode { diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index 034aab1ddd6..4225f926592 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -242,19 +242,19 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { } } - public split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { + public split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { const internalSplitIndex = charIndex - (this.codepointLength - this.insertLength); if(internalSplitIndex <= 0 && this.parents[0]) { const parentResults = this.parents[0].split(charIndex); - return [parentResults[0], this.construct(parentResults[1], this.inputs, this.inputSource)]; + return parentResults.map((result) => [result[0], this.construct(result[1], this.inputs, this.inputSource)]); } else if(charIndex >= this.codepointLength) { // this instance = 'first set' // second instance: empty transforms. // // stopgap: maybe go ahead and check each input for any that are longer? // won't matter shortly, though. - return [this, new LegacyQuotientRoot(this.model)]; + return [[this, new LegacyQuotientRoot(this.model)]]; } else { const firstSet: Distribution = this.inputs.map((input) => ({ // keep insert head @@ -289,7 +289,7 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { }) : this.parentNode; // construct two SearchPath instances based on the two sets! - return [ + return [[ parent, this.construct(new LegacyQuotientRoot(this.model), secondSet, { ...this.inputSource, @@ -298,7 +298,7 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { start: this.inputSource.segment.start + internalSplitIndex } }) - ]; + ]]; } } diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts index 4238ca0594e..f3a39a93c5a 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -389,7 +389,9 @@ describe('SearchQuotientSpur', () => { const { paths, distributions } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, splitIndex); assert.equal(tail.inputCount, pathToSplit.inputCount - splitIndex); @@ -419,7 +421,7 @@ describe('SearchQuotientSpur', () => { const { paths, distributions } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head, tail] = pathToSplit.split(0); + const [head, tail] = pathToSplit.split(0)[0]; // The split operation will still reconstruct the token; the head // is always built from the same root path, while the tail is not. @@ -436,7 +438,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(1); + const [head] = pathToSplit.split(1)[0]; assert.equal(head, pathToSplit.parents[0].parents[0].parents[0]); }); @@ -446,7 +448,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(2); + const [head] = pathToSplit.split(2)[0]; assert.equal(head, pathToSplit.parents[0].parents[0]); }); @@ -456,7 +458,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(3); + const [head] = pathToSplit.split(3)[0]; assert.equal(head, pathToSplit.parents[0]); }); @@ -466,7 +468,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(4); + const [head] = pathToSplit.split(4)[0]; assert.equal(head, pathToSplit); }); @@ -510,7 +512,9 @@ describe('SearchQuotientSpur', () => { const runSplit = (splitIndex: number) => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, Math.ceil(splitIndex/2)); assert.equal(tail.inputCount, Math.ceil(pathToSplit.inputCount - splitIndex/2)); @@ -573,7 +577,7 @@ describe('SearchQuotientSpur', () => { runSplit(0); const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(0); + const [head, tail] = pathToSplit.split(0)[0]; // The split operation will still reconstruct the token; the head // is always built from the same root path, while the tail is not. @@ -617,7 +621,7 @@ describe('SearchQuotientSpur', () => { runSplit(8); const { path: pathToSplit } = buildPath(); - const [head, tail] = pathToSplit.split(8); + const [head, tail] = pathToSplit.split(8)[0]; assert.equal(head, pathToSplit.parents[0]); assert.equal((tail as SearchQuotientSpur).inputSource, (pathToSplit as SearchQuotientSpur).inputSource); @@ -627,7 +631,7 @@ describe('SearchQuotientSpur', () => { runSplit(9); const { path: pathToSplit } = buildPath(); - const [head, tail] = pathToSplit.split(9); + const [head, tail] = pathToSplit.split(9)[0]; // Same parent, but not the same final step - it _was_ split, after // all. @@ -647,7 +651,7 @@ describe('SearchQuotientSpur', () => { runSplit(10); const { path: pathToSplit } = buildPath(); - const [head, tail] = pathToSplit.split(10); + const [head, tail] = pathToSplit.split(10)[0]; assert.equal(head, pathToSplit); assert.isTrue(tail instanceof SearchQuotientRoot); @@ -713,7 +717,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 0; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 0); assert.equal(tail.inputCount, 4); @@ -753,7 +759,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 1; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // c in the first input, though the 'a' part is deleted later. assert.equal(head.inputCount, 1); @@ -796,7 +804,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 2', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(2); + const splitResults = pathToSplit.split(2); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // ce in the second input, though the n is deleted later. assert.equal(head.inputCount, 2); @@ -839,7 +849,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 3', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(3); + const splitResults = pathToSplit.split(3); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cen => cel in the third input, and there's no adjacent deleteLeft. assert.equal(head.inputCount, 3); @@ -887,7 +899,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 4', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(4); + const splitResults = pathToSplit.split(4); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cellar in the third input, though the -ar is deleted later. assert.equal(head.inputCount, 3); @@ -963,7 +977,9 @@ describe('SearchQuotientSpur', () => { it('splits properly after \'big\'', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(3); + const splitResults = pathToSplit.split(3); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 1); assert.equal(tail.inputCount, 1); @@ -1004,7 +1020,9 @@ describe('SearchQuotientSpur', () => { it('splits properly after \'biglarge\'', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(8); + const splitResults = pathToSplit.split(8); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 1); assert.equal(tail.inputCount, 1); @@ -1107,7 +1125,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 0; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 0); assert.equal(tail.inputCount, 4); @@ -1147,7 +1167,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 1; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // c in the first input, though the 'a' part is deleted later. assert.equal(head.inputCount, 1); @@ -1190,7 +1212,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 2', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(2); + const splitResults = pathToSplit.split(2); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // ce in the second input, though the n is deleted later. assert.equal(head.inputCount, 2); @@ -1233,7 +1257,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 3', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(3); + const splitResults = pathToSplit.split(3); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cen => cel in the third input, and there's no adjacent deleteLeft. assert.equal(head.inputCount, 3); @@ -1282,7 +1308,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 4', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(4); + const splitResults = pathToSplit.split(4); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cellar in the third input, though the -ar is deleted later. assert.equal(head.inputCount, 3); @@ -1380,7 +1408,9 @@ describe('SearchQuotientSpur', () => { } ); - const [head, tail] = pathToSplit.split(2); + const splitResults = pathToSplit.split(2); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.deepEqual(head.bestExample, headTarget.bestExample); assert.deepEqual(tail.bestExample, tailTarget.bestExample);