diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts index 458ced59d66..2f250f0e7c0 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/context-token.ts @@ -212,7 +212,12 @@ export class ContextToken { if(splitSpecs.length == 0 && spec.textOffset == 0) { searchSplits.push(searchSpace); } else { - const splitSpaces = searchSpace.split(spec.textOffset); + // Note: it is conceivable for a token to split into multiple potential + // tokens due to variations in how the text's construction proceeded up + // to the point of the split. + // + // For now, as a stopgap, we simply take the first such split and roll with that. + const splitSpaces = searchSpace.split(spec.textOffset)[0]; searchSplits.push(splitSpaces[1]); searchSpace = splitSpaces[0]; } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts index f0bfbaa5009..c47b47a855b 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/legacy-quotient-root.ts @@ -59,7 +59,7 @@ export class LegacyQuotientRoot extends SearchQuotientRoot { return this.processed.slice(); } - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { - return [this, new LegacyQuotientRoot(this.model)]; + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { + return [[this, new LegacyQuotientRoot(this.model)]]; } } \ No newline at end of file diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts index 117cd9687d3..d64598e9654 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-cluster.ts @@ -226,7 +226,7 @@ export class SearchQuotientCluster implements SearchQuotientNode { return new SearchQuotientCluster(parentMerges); } - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { throw new Error('Method not implemented.'); } diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts index c88f2f33301..429fe6030fd 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-node.ts @@ -204,9 +204,14 @@ export interface SearchQuotientNode { * Splits this SearchQuotientNode into two halves at the specified codepoint index. * The 'head' component will maximally re-use existing cached data, while the * 'tail' must be reconstructed from scratch due to the new start position. + * + * It is possible that there are multiple distinct ways to split the + * SearchSpace into halves if the split is not consistently clean (between + * input boundaries) for all possible path-sequences modeled by the original + * SearchSpace instance. * @param charIndex */ - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode]; + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][]; /** * Determines if the SearchQuotientNode is a duplicate of another instance. diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts index 61399815609..90e8e83822f 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-root.ts @@ -117,8 +117,8 @@ export class SearchQuotientRoot implements SearchQuotientNode { return true; } - split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { - return [this, new SearchQuotientRoot(this.model)]; + split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { + return [[this, new SearchQuotientRoot(this.model)]]; } merge(space: SearchQuotientNode): SearchQuotientNode { diff --git a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts index 034aab1ddd6..4225f926592 100644 --- a/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts +++ b/web/src/engine/predictive-text/worker-thread/src/main/correction/search-quotient-spur.ts @@ -242,19 +242,19 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { } } - public split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] { + public split(charIndex: number): [SearchQuotientNode, SearchQuotientNode][] { const internalSplitIndex = charIndex - (this.codepointLength - this.insertLength); if(internalSplitIndex <= 0 && this.parents[0]) { const parentResults = this.parents[0].split(charIndex); - return [parentResults[0], this.construct(parentResults[1], this.inputs, this.inputSource)]; + return parentResults.map((result) => [result[0], this.construct(result[1], this.inputs, this.inputSource)]); } else if(charIndex >= this.codepointLength) { // this instance = 'first set' // second instance: empty transforms. // // stopgap: maybe go ahead and check each input for any that are longer? // won't matter shortly, though. - return [this, new LegacyQuotientRoot(this.model)]; + return [[this, new LegacyQuotientRoot(this.model)]]; } else { const firstSet: Distribution = this.inputs.map((input) => ({ // keep insert head @@ -289,7 +289,7 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { }) : this.parentNode; // construct two SearchPath instances based on the two sets! - return [ + return [[ parent, this.construct(new LegacyQuotientRoot(this.model), secondSet, { ...this.inputSource, @@ -298,7 +298,7 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode { start: this.inputSource.segment.start + internalSplitIndex } }) - ]; + ]]; } } diff --git a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts index 4238ca0594e..f3a39a93c5a 100644 --- a/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts +++ b/web/src/test/auto/headless/engine/predictive-text/worker-thread/correction-search/search-quotient-spur.tests.ts @@ -389,7 +389,9 @@ describe('SearchQuotientSpur', () => { const { paths, distributions } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, splitIndex); assert.equal(tail.inputCount, pathToSplit.inputCount - splitIndex); @@ -419,7 +421,7 @@ describe('SearchQuotientSpur', () => { const { paths, distributions } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head, tail] = pathToSplit.split(0); + const [head, tail] = pathToSplit.split(0)[0]; // The split operation will still reconstruct the token; the head // is always built from the same root path, while the tail is not. @@ -436,7 +438,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(1); + const [head] = pathToSplit.split(1)[0]; assert.equal(head, pathToSplit.parents[0].parents[0].parents[0]); }); @@ -446,7 +448,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(2); + const [head] = pathToSplit.split(2)[0]; assert.equal(head, pathToSplit.parents[0].parents[0]); }); @@ -456,7 +458,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(3); + const [head] = pathToSplit.split(3)[0]; assert.equal(head, pathToSplit.parents[0]); }); @@ -466,7 +468,7 @@ describe('SearchQuotientSpur', () => { const { paths } = buildSimplePathSplitFixture(); const pathToSplit = paths[4]; - const [head] = pathToSplit.split(4); + const [head] = pathToSplit.split(4)[0]; assert.equal(head, pathToSplit); }); @@ -510,7 +512,9 @@ describe('SearchQuotientSpur', () => { const runSplit = (splitIndex: number) => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, Math.ceil(splitIndex/2)); assert.equal(tail.inputCount, Math.ceil(pathToSplit.inputCount - splitIndex/2)); @@ -573,7 +577,7 @@ describe('SearchQuotientSpur', () => { runSplit(0); const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(0); + const [head, tail] = pathToSplit.split(0)[0]; // The split operation will still reconstruct the token; the head // is always built from the same root path, while the tail is not. @@ -617,7 +621,7 @@ describe('SearchQuotientSpur', () => { runSplit(8); const { path: pathToSplit } = buildPath(); - const [head, tail] = pathToSplit.split(8); + const [head, tail] = pathToSplit.split(8)[0]; assert.equal(head, pathToSplit.parents[0]); assert.equal((tail as SearchQuotientSpur).inputSource, (pathToSplit as SearchQuotientSpur).inputSource); @@ -627,7 +631,7 @@ describe('SearchQuotientSpur', () => { runSplit(9); const { path: pathToSplit } = buildPath(); - const [head, tail] = pathToSplit.split(9); + const [head, tail] = pathToSplit.split(9)[0]; // Same parent, but not the same final step - it _was_ split, after // all. @@ -647,7 +651,7 @@ describe('SearchQuotientSpur', () => { runSplit(10); const { path: pathToSplit } = buildPath(); - const [head, tail] = pathToSplit.split(10); + const [head, tail] = pathToSplit.split(10)[0]; assert.equal(head, pathToSplit); assert.isTrue(tail instanceof SearchQuotientRoot); @@ -713,7 +717,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 0; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 0); assert.equal(tail.inputCount, 4); @@ -753,7 +759,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 1; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // c in the first input, though the 'a' part is deleted later. assert.equal(head.inputCount, 1); @@ -796,7 +804,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 2', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(2); + const splitResults = pathToSplit.split(2); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // ce in the second input, though the n is deleted later. assert.equal(head.inputCount, 2); @@ -839,7 +849,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 3', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(3); + const splitResults = pathToSplit.split(3); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cen => cel in the third input, and there's no adjacent deleteLeft. assert.equal(head.inputCount, 3); @@ -887,7 +899,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 4', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(4); + const splitResults = pathToSplit.split(4); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cellar in the third input, though the -ar is deleted later. assert.equal(head.inputCount, 3); @@ -963,7 +977,9 @@ describe('SearchQuotientSpur', () => { it('splits properly after \'big\'', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(3); + const splitResults = pathToSplit.split(3); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 1); assert.equal(tail.inputCount, 1); @@ -1004,7 +1020,9 @@ describe('SearchQuotientSpur', () => { it('splits properly after \'biglarge\'', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(8); + const splitResults = pathToSplit.split(8); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 1); assert.equal(tail.inputCount, 1); @@ -1107,7 +1125,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 0; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.equal(head.inputCount, 0); assert.equal(tail.inputCount, 4); @@ -1147,7 +1167,9 @@ describe('SearchQuotientSpur', () => { const splitIndex = 1; const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(splitIndex); + const splitResults = pathToSplit.split(splitIndex); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // c in the first input, though the 'a' part is deleted later. assert.equal(head.inputCount, 1); @@ -1190,7 +1212,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 2', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(2); + const splitResults = pathToSplit.split(2); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // ce in the second input, though the n is deleted later. assert.equal(head.inputCount, 2); @@ -1233,7 +1257,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 3', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(3); + const splitResults = pathToSplit.split(3); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cen => cel in the third input, and there's no adjacent deleteLeft. assert.equal(head.inputCount, 3); @@ -1282,7 +1308,9 @@ describe('SearchQuotientSpur', () => { it('splits properly at index 4', () => { const { path: pathToSplit, distributions } = buildPath(); - const [head, tail] = pathToSplit.split(4); + const splitResults = pathToSplit.split(4); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; // cellar in the third input, though the -ar is deleted later. assert.equal(head.inputCount, 3); @@ -1380,7 +1408,9 @@ describe('SearchQuotientSpur', () => { } ); - const [head, tail] = pathToSplit.split(2); + const splitResults = pathToSplit.split(2); + assert.equal(splitResults.length, 1); + const [head, tail] = splitResults[0]; assert.deepEqual(head.bestExample, headTarget.bestExample); assert.deepEqual(tail.bestExample, tailTarget.bestExample);