Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
* in the context and associated correction-search progress and results.
*/

import { buildMergedTransform } from "@keymanapp/models-templates";
import { LexicalModelTypes } from '@keymanapp/common-types';
import { deepCopy, KMWString } from "@keymanapp/web-utils";

Expand Down Expand Up @@ -182,59 +181,19 @@ export class ContextToken {
* @param lexicalModel
* @returns
*/
static merge(tokensToMerge: ContextToken[], lexicalModel: LexicalModel): ContextToken {
static merge(tokensToMerge: ContextToken[]): ContextToken {
if(tokensToMerge.length < 1) {
return null;
}

// Assumption: if we're merging a token, it's not whitespace.
// Thus, we don't set the .isWhitespace flag field.
const resultToken = new ContextToken(lexicalModel);

let lastSourceInput: PathInputProperties;
let lastInputDistrib: Distribution<Transform>;
for(const token of tokensToMerge) {
const inputCount = token.inputCount;
let startIndex = 0;

if(inputCount == 0) {
continue;
}

// Are we re-merging on a previously split transform?
if(lastSourceInput?.segment.trueTransform != token.inputSegments[0].segment.trueTransform) {
if(lastSourceInput) {
resultToken.addInput(lastSourceInput, lastInputDistrib);
} // else: there's nothing to add as input
} else {
// If so, re-merge it!
startIndex++;

lastInputDistrib = lastInputDistrib?.map((entry, index) => {
return {
sample: buildMergedTransform(entry.sample, token.searchModule.inputSequence[0][index].sample),
p: entry.p
}
});

// In case there's only one input that needs merging on both ends.
if(inputCount == 1) {
// There's potential that the next incoming token needs to merge with this.
continue;
} else {
resultToken.addInput(lastSourceInput, lastInputDistrib);
}
}
lastSourceInput = null;
lastInputDistrib = null;

// Ignore the last entry for now - it may need to merge with a matching
// entry in the next token!
for(let i = startIndex; i < inputCount - 1; i++) {
resultToken.addInput(token.inputSegments[i], token.searchModule.inputSequence[i]);
}
lastSourceInput = token.inputSegments[inputCount-1];
lastInputDistrib = token.searchModule.inputSequence[inputCount-1];
const resultToken = new ContextToken(tokensToMerge.shift());
while(tokensToMerge.length > 0) {
const next = tokensToMerge.shift();
resultToken._searchModule = resultToken._searchModule.merge(next._searchModule);
}

resultToken.addInput(lastSourceInput, lastInputDistrib);

return resultToken;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ export class ContextTokenization {
// consider: move to ContextToken as class method. (static?)
const merge = merges.shift();
const tokensToMerge = merge.inputs.map((m) => baseTokenization[m.index]);
const mergeResult = ContextToken.merge(tokensToMerge, lexicalModel);
const mergeResult = ContextToken.merge(tokensToMerge);
tokenization.push(mergeResult);
i = merge.inputs[merge.inputs.length - 1].index;
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ export class LegacyQuotientSpur extends SearchQuotientSpur {
return;
}

protected construct(parentNode: SearchQuotientNode, inputs?: Distribution<Transform>, inputSource?: PathInputProperties): this {
construct(parentNode: SearchQuotientNode, inputs?: Distribution<Transform>, inputSource?: PathInputProperties): this {
return new LegacyQuotientSpur(parentNode, inputs, inputSource) as this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*
* Created by jahorton on 2025-10-09
*
* This file defines the predictive-text engine's SearchSpace class, which is used to
* This file defines the predictive-text engine's SearchQuotientNode class, which is used to
* manage the search-space(s) for text corrections within the engine.
*/

Expand Down Expand Up @@ -179,7 +179,7 @@ export interface SearchQuotientNode {
readonly inputCount: number;

/**
* Retrieves the sequence of inputs that led to this SearchSpace.
* Retrieves the sequence of inputs that led to this SearchQuotientNode.
*
* THIS WILL BE REMOVED SHORTLY in favor of `constituentPaths` below, which
* provides an improved view into the data and models multiple paths to the
Expand Down Expand Up @@ -215,7 +215,16 @@ export interface SearchQuotientNode {
get sourceRangeKey(): string;

/**
* Splits this SearchSpace into two halves at the specified codepoint index.
* Appends this SearchQuotientNode with the provided SearchQuotientNode's search properties,
* extending the represented search range accordingly. If this operation
* represents merging the result of a previous .split() call, the two halves
* of any split input components will be fully re-merged.
* @param space
*/
merge(space: SearchQuotientNode): SearchQuotientNode;

/**
* Splits this SearchQuotientNode into two halves at the specified codepoint index.
* The 'head' component will maximally re-use existing cached data, while the
* 'tail' must be reconstructed from scratch due to the new start position.
* @param charIndex
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { LexicalModelTypes } from '@keymanapp/common-types';

import { SearchNode, SearchResult } from './distance-modeler.js';
import { generateSpaceSeed, PathInputProperties, PathResult, SearchQuotientNode } from './search-quotient-node.js';
import { SearchQuotientSpur } from './search-quotient-spur.js';

import LexicalModel = LexicalModelTypes.LexicalModel;

Expand Down Expand Up @@ -105,4 +106,31 @@ export class SearchQuotientRoot implements SearchQuotientNode {
split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] {
return [this, new SearchQuotientRoot(this.model)];
}

merge(space: SearchQuotientNode): SearchQuotientNode {
// Head node for the incoming path is empty, so skip it.
if(space.parents.length == 0 || space instanceof SearchQuotientRoot) {
return this;
}

// Merge any parents first as a baseline. We have to come after their
// affects are merged in, anyway.
const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this];

// if parentMerges.length > 0, is a SearchCluster.
// const parentMerge = parentMerges.length > 0 ? new SearchCluster(parentMerges) : parentMerges[0];
const parentMerge = parentMerges[0];

// Special case: if we've reached the head of the space to be merged, check
// for a split transform.
// - we return `this` from the root, so if that's what we received, we're
// on the first descendant - the first path component.
if(space instanceof SearchQuotientSpur) {
// Needs to construct a NEW version of whatever the same type is, on this root.
return space.construct(parentMerge, space.inputs, space.inputSource);
} else {
// If the parent was a cluster, the cluster itself is the merge.
return parentMerge;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@

import { QueueComparator as Comparator, KMWString, PriorityQueue } from '@keymanapp/web-utils';
import { LexicalModelTypes } from '@keymanapp/common-types';
import { buildMergedTransform } from '@keymanapp/models-templates';

import { EDIT_DISTANCE_COST_SCALE, SearchNode, SearchResult } from './distance-modeler.js';
import { generateSpaceSeed, PathResult, SearchQuotientNode, PathInputProperties } from './search-quotient-node.js';
import { generateSubsetId } from './tokenization-subsets.js';
import { SearchQuotientRoot } from './search-quotient-root.js';
import { LegacyQuotientRoot } from './legacy-quotient-root.js';

import Distribution = LexicalModelTypes.Distribution;
Expand Down Expand Up @@ -155,13 +157,102 @@ export abstract class SearchQuotientSpur implements SearchQuotientNode {
this.selectionQueue = new PriorityQueue<SearchNode>(QUEUE_NODE_COMPARATOR, entries);
}

/** Allows the base class to construct instances of the derived class. */
protected abstract construct(
/**
* Allows construction of new spur instances matching this spur's edge type.
*
* Target use cases:
* - `SearchQuotientNode.split()`
* - an edge may need to be split into two parts
* - edges may need to be recreated on a shortened search path (for the
* split's right-hand side)
* - `SearchQuotientNode.merge()`
* - two parts may need to be recombined into a single edge
* - edges from the 'right-hand side' may need to be recreated on the
* left-hand side for the merged quotient path
* @param parentNode
* @param inputs
* @param inputSource
*/
abstract construct(
parentNode: SearchQuotientNode,
inputs?: Distribution<Transform>,
inputSource?: PathInputProperties
inputs: Distribution<Transform>,
inputSource: PathInputProperties
): this;

// spaces are in sequence here.
// `this` = head 'space'.
public merge(space: SearchQuotientNode): SearchQuotientNode {
// Head node for the incoming path is empty, so skip it.
if(space.parents.length == 0 || space instanceof SearchQuotientRoot) {
return this;
}

// Merge any parents first as a baseline. We have to come after their
// affects are merged in, anyway.
const parentMerges = space.parents?.length > 0 ? space.parents.map((p) => this.merge(p)) : [this];

// if parentMerges.length > 0, is a SearchCluster.
const parentMerge = parentMerges[0];

// Special case: if we've reached the head of the space to be merged, check
// for a split transform.
// - we return `this` from the root, so if that's what we received, we're
// on the first descendant - the first path component.
if(space instanceof SearchQuotientSpur) {
if(parentMerge != this) {
// Here, we reconstruct the child `space` on a new root. The new
// instance needs to be of the same type as the original instance.
return space.construct(parentMerge, space.inputs, space.inputSource);
}

const localInputId = this.inputSource?.segment.transitionId;
const spaceInputId = space.inputSource?.segment.transitionId;
// The 'id' may be undefined in some unit tests and for tokens
// reconstructed after a backspace. In either case, we consider the
// related results as fully separate; our reconstructions are
// per-codepoint.
if(localInputId != spaceInputId || localInputId === undefined) {
return space.construct(parentMerge, space.inputs, space.inputSource);
}
// Get the twin halves that were split.
// Assumption: the two halves are in their original order, etc.
const localInputs = this.inputs;
const spaceInputs = space.inputs;

// Sanity check - ensure that the input distributions have the same length;
// if not, this shouldn't represent a SearchPath split!
if(localInputs.length != spaceInputs.length) {
return space.construct(parentMerge, space.inputs, space.inputSource);
}

// Merge them!
const mergedInputs = localInputs?.map((entry, index) => {
return {
sample: buildMergedTransform(entry.sample, spaceInputs[index].sample),
p: entry.p
}
});

const mergedInputSource = {
...this.inputSource,
segment: {
...this.inputSource.segment,
end: space.inputSource.segment.end
}
};

if(mergedInputSource.segment.end == undefined) {
delete mergedInputSource.segment.end;
}

// Now to re-merge the two halves.
return space.construct(this.parentNode, mergedInputs, mergedInputSource);
} else {
// If the parent was a cluster, the cluster itself is the merge.
return parentMerge;
}
}

public split(charIndex: number): [SearchQuotientNode, SearchQuotientNode] {
const internalSplitIndex = charIndex - (this.codepointLength - this.insertLength);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ describe('ContextToken', function() {
const token2 = new ContextToken(plainModel, "'");
const token3 = new ContextToken(plainModel, "t");

const merged = ContextToken.merge([token1, token2, token3], plainModel);
const merged = ContextToken.merge([token1, token2, token3]);
assert.equal(merged.exampleInput, "can't");
token1.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1));
token2.inputSegments.forEach((entry) => assert.isTrue(merged.inputSegments.indexOf(entry) > -1));
Expand Down Expand Up @@ -161,7 +161,7 @@ describe('ContextToken', function() {
subsetId: srcSubsetId
}, [{sample: {insert: 't', deleteLeft: 0, deleteRight: 0, id: 1}, p: 1}]);

const merged = ContextToken.merge([token1, token2, token3], plainModel);
const merged = ContextToken.merge([token1, token2, token3]);
assert.equal(merged.exampleInput, "can't");
assert.deepEqual(merged.inputSegments, [ {
segment: {
Expand Down Expand Up @@ -259,7 +259,7 @@ describe('ContextToken', function() {
subsetId: srcSubsetIds[3]
}, [{sample: srcTransforms[3], p: 1}]);

const merged = ContextToken.merge(tokensToMerge, plainModel);
const merged = ContextToken.merge(tokensToMerge);
assert.equal(merged.exampleInput, "applesandsourgrapes");
assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({
segment: {
Expand Down Expand Up @@ -359,7 +359,7 @@ describe('ContextToken', function() {
subsetId: srcSubsetIds[3]
}, [{sample: srcTransforms[3], p: 1}]);

const merged = ContextToken.merge(tokensToMerge, plainModel);
const merged = ContextToken.merge(tokensToMerge);
assert.equal(merged.exampleInput, toMathematicalSMP("applesandsourgrapes"));
assert.deepEqual(merged.inputSegments, srcTransforms.map((t, i) => ({
segment: {
Expand Down
Loading