Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,15 @@ export class ContextState {
*
* May also contain a single entry for applying Suggestions or when correction behavior
* is disabled.
* @param isApplyingSuggestion When true, alters behavior to better model application of suggestions.
* @param appliedSuggestionId When defined, notes the original transition ID corresponding to
* the applied suggestion.
* @returns
*/
analyzeTransition(
context: Context,
transformDistribution: Distribution<Transform>,
// overrides checks for token substitution that can fail for large applied suggestions.
isApplyingSuggestion?: boolean
appliedSuggestionId?: number
): ContextTransition {
const lexicalModel = this.model;

Expand Down Expand Up @@ -249,7 +250,8 @@ export class ContextState {
// into subsets.
const bestProb = transformDistribution.reduce((best, curr) => Math.max(best, curr.p), 0);
// Should gain one per subsetBuilder.subsets entry.
const resultTokenization = baseTokenization.evaluateTransition(tokenizationAnalysis, lexicalModel, trueInput, bestProb);
const realignedTokenization = baseTokenization.realign(tokenizationAnalysis.alignment);
const resultTokenization = realignedTokenization.evaluateTransition(tokenizationAnalysis, trueInput.id, bestProb, appliedSuggestionId);

// ------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,15 @@ export class ContextTokenization {
* The sequence of tokens in the context represented by this instance.
*/
readonly tokens: ContextToken[];

/**
* The tokenization-transition metadata relating this instance to the most likely
* tokenization from a prior state.
* Denotes whether or not the transition to this tokenization added or deleted
* any tokens.
*/
readonly transitionEdits?: TransitionEdge;
readonly transitionEdits?: {
addedNewTokens: boolean,
removedOldTokens: boolean
};

/**
* The portion of edits from the true input keystroke that are not part of the
Expand All @@ -129,13 +133,18 @@ export class ContextTokenization {
constructor(tokens: ContextToken[], alignment: TransitionEdge, taillessTrueKeystroke: Transform);
constructor(
param1: ContextToken[] | ContextTokenization,
alignment?: TransitionEdge,
tokenizationPath?: TransitionEdge,
taillessTrueKeystroke?: Transform
) {
if(!(param1 instanceof ContextTokenization)) {
const tokens = param1;
this.tokens = [].concat(tokens);
this.transitionEdits = alignment;
if(tokenizationPath) {
this.transitionEdits = {
addedNewTokens: tokenizationPath?.inputs[0].sample.has(1) ?? false,
removedOldTokens: (tokenizationPath?.alignment.removedTokenCount ?? 0) > 0
}
}
this.taillessTrueKeystroke = taillessTrueKeystroke;
} else {
const priorToClone = param1;
Expand Down Expand Up @@ -489,30 +498,16 @@ export class ContextTokenization {

/**
* Given results from `precomputeTokenizationAfterInput`, this method will
* evaluate the pending transition in tokenization for all associated inputs
* realign this tokenization's range to match the incoming keystroke's context window
* while reusing as many correction-search intermediate results as possible.
* @param transitionEdge Batched results from one or more
* @param alignment Batched results from one or more
* `precomputeTokenizationAfterInput` calls on this instance, all with the
* same alignment values.
* @param lexicalModel The active lexical model
* @param sourceInput The Transform associated with the keystroke triggering
* the transition.
* @param bestProbFromSet The probability of the single most likely input
* transform in the overall transformDistribution associated with the
* keystroke triggering the transition. It need not be represented by the
* TransitionEdge to be built.
* @returns
*/
evaluateTransition(
transitionEdge: TransitionEdge,
lexicalModel: LexicalModel,
sourceInput: Transform,
bestProbFromSet: number
): ContextTokenization {
const { alignment: alignment, inputs } = transitionEdge;
realign(alignment: TransitionEdgeAlignment): ContextTokenization {
const sliceIndex = alignment.edgeWindow.sliceIndex;
const baseTokenization = this.tokens.slice(sliceIndex);
let affectedToken: ContextToken;

const tokenization: ContextToken[] = [];

Expand Down Expand Up @@ -553,33 +548,71 @@ export class ContextTokenization {
tokenization.push(token);
}

return new ContextTokenization(this.tokens.slice(0, sliceIndex).concat(tokenization), null, this.taillessTrueKeystroke);
}

/**
* Given results from `precomputeTokenizationAfterInput`, this method will
* evaluate the pending transition in tokenization for all associated inputs
* while reusing as many correction-search intermediate results as possible.
* @param transitionEdge Batched results from one or more
* `precomputeTokenizationAfterInput` calls on this instance, all with the
* same alignment values.
* @param transitionId The id of the Transform associated with the keystroke
* triggering the transition.
* @param bestProbFromSet The probability of the single most likely input
* transform in the overall transformDistribution associated with the
* keystroke triggering theh transition. It need not be represented by the
* tokenizationPath to be built.
* @param appliedSuggestionId
* @returns
*/
evaluateTransition(
transitionEdge: TransitionEdge,
transitionId: number,
bestProbFromSet: number,
appliedSuggestionId?: number
): ContextTokenization {
const { alignment, inputs } = transitionEdge;
const sliceIndex = alignment.edgeWindow.sliceIndex;
const lexicalModel = this.tail.searchModule.model;

let affectedToken: ContextToken;

const tailTokenization = this.tokens.slice(sliceIndex);

// Assumption: inputs.length > 0. (There is at least one input transform.)
const inputTransformKeys = [...inputs[0].sample.keys()];
const baseTailIndex = (tailTokenization.length - 1);
let removedTokenCount = alignment.removedTokenCount;
while(removedTokenCount-- > 0) {
inputTransformKeys.pop();
tokenization.pop();
tailTokenization.pop();
}

let appliedLength = 0;
for(let i = 0; i < inputTransformKeys.length; i++) {
const tailRelativeIndex = inputTransformKeys[i];
let distribution = inputs.map((i) => ({sample: i.sample.get(tailRelativeIndex), p: i.p}));
const tokenIndex = (tokenization.length - 1) + tailRelativeIndex;
const tokenIndex = baseTailIndex + tailRelativeIndex;

affectedToken = tokenization[tokenIndex];
affectedToken = tailTokenization[tokenIndex];
if(!affectedToken) {
affectedToken = new ContextToken(lexicalModel);
tokenization.push(affectedToken);
tailTokenization.push(affectedToken);
} else if(KMWString.length(affectedToken.exampleInput) == distribution[0].sample.deleteLeft) {
// If the entire token will be replaced, throw out the old one and start anew.
affectedToken = new ContextToken(lexicalModel);
// Replace the token at the affected index with a brand-new token.
tokenization.splice(tokenIndex, 1, affectedToken);
tailTokenization.splice(tokenIndex, 1, affectedToken);
}

affectedToken.isPartial = true;
delete affectedToken.appliedTransitionId;
if(appliedSuggestionId !== undefined) {
affectedToken.appliedTransitionId = appliedSuggestionId;
} else {
delete affectedToken.appliedTransitionId;
}

// If we are completely replacing a token via delete left, erase the deleteLeft;
// that part applied to a _previous_ token that no longer exists.
Expand All @@ -590,7 +623,7 @@ export class ContextTokenization {

const inputSource: PathInputProperties = {
segment: {
transitionId: sourceInput.id,
transitionId,
start: appliedLength
},
bestProbFromSet: bestProbFromSet,
Expand All @@ -601,17 +634,21 @@ export class ContextTokenization {
inputSource.segment.end = appliedLength;
}

affectedToken = new ContextToken(affectedToken);
affectedToken.addInput(inputSource, distribution);

const tokenize = determineModelTokenizer(lexicalModel);
affectedToken.isWhitespace = tokenize({left: affectedToken.exampleInput, startOfBuffer: false, endOfBuffer: false}).left[0]?.isWhitespace ?? false;
// Do not re-use the previous token; the mutation may have unexpected
// results (say, in unit-testing)
tailTokenization[tokenIndex] = affectedToken;

affectedToken = null;
}

return new ContextTokenization(
this.tokens.slice(0, sliceIndex).concat(tokenization),
null /* tokenMapping */,
this.tokens.slice(0, sliceIndex).concat(tailTokenization),
transitionEdge,
determineTaillessTrueKeystroke(transitionEdge)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,6 @@ import Reversion = LexicalModelTypes.Reversion;
import Suggestion = LexicalModelTypes.Suggestion;
import Transform = LexicalModelTypes.Transform;

// Mark affected tokens with the applied-suggestion transition ID
// for easy future reference.
const tagTokens = (state: ContextState, suggestion: Suggestion) => {
const inputs = state.tokenization.transitionEdits.inputs;
const appliedTokenCount = inputs[0].sample.size;
const tokens = state.tokenization.tokens;
for(let i = tokens.length - appliedTokenCount; i < tokens.length; i++) {
tokens[i].appliedTransitionId = suggestion.transformId;
}
}

/**
* Represents the transition between two context states as triggered
* by input keystrokes or applied suggestions.
Expand Down Expand Up @@ -145,15 +134,15 @@ export class ContextTransition {
const buildAppliedTransition = (
transition: ContextTransition,
baseState: ContextState,
transform: Transform
transform: Transform,
appliedTransitionId: number
) => {
const state = baseState.analyzeTransition(
baseState.context,
[{sample: transform, p: 1}],
true
appliedTransitionId
).final;

tagTokens(state, suggestion);
transition._final = state;

// Applying a suggestion should not forget the original suggestion set.
Expand All @@ -166,7 +155,7 @@ export class ContextTransition {
// keystroke data.

const resultTransition = new ContextTransition(this);
buildAppliedTransition(resultTransition, this.base, suggestion.transform);
buildAppliedTransition(resultTransition, this.base, suggestion.transform, suggestion.transformId);

// An applied suggestion should replace the original Transition's effects, though keeping
// the original input around.
Expand All @@ -178,7 +167,7 @@ export class ContextTransition {
}

const finalTransition = new ContextTransition(resultTransition.final, suggestion.appendedTransform.id);
buildAppliedTransition(finalTransition, resultTransition.final, suggestion.appendedTransform);
buildAppliedTransition(finalTransition, resultTransition.final, suggestion.appendedTransform, suggestion.transformId);

// The appended transform is applied with no intermediate input.
finalTransition.final.appliedInput = { insert: '', deleteLeft: 0 };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,14 +346,13 @@ export function determineSuggestionAlignment(
const context = transition.base.context;
const postContext = transition.final.context;
const inputTransform = transition.inputDistribution[0].sample;
const inputTransformMap = transitionEdits?.inputs[0].sample;
let deleteLeft: number;

// If the context now has more tokens, the token we'll be 'predicting' didn't originally exist.
const wordbreak = determineModelWordbreaker(lexicalModel);

// Is the token under construction newly-constructed / is there no pre-existing root?
if(tokenization.taillessTrueKeystroke && inputTransformMap?.has(1)) {
if(tokenization.taillessTrueKeystroke && transitionEdits?.addedNewTokens) {
return {
// If the new token is due to whitespace or due to a different input type
// that would likely imply a tokenization boundary, infer 'new word' mode.
Expand All @@ -366,7 +365,7 @@ export function determineSuggestionAlignment(
deleteLeft: 0
};
// If the tokenized context length is shorter... sounds like a backspace (or similar).
} else if (transitionEdits?.alignment.removedTokenCount > 0) {
} else if (transitionEdits?.removedOldTokens) {
/* Ooh, we've dropped context here. Almost certainly from a backspace or
* similar effect. Even if we drop multiple tokens... well, we know exactly
* how many chars were actually deleted - `inputTransform.deleteLeft`. Since
Expand Down
Loading