diff --git a/README.md b/README.md index ce0ea4fa0..48272195f 100644 --- a/README.md +++ b/README.md @@ -82,7 +82,7 @@ pnpm i # if you're using pnpm, or npm install if you're using npm ## build the codex-webviews codex-editor % cd webviews/codex-webviews codex-webviews % pnpm i -codex-webviews % pnpm run build +codex-webviews % pnpm run build:all ## build the dictionary-side-panel codex-webviews % cd ../dictionary-side-panel diff --git a/package.json b/package.json index 4609e87a4..b4fc5a469 100644 --- a/package.json +++ b/package.json @@ -193,6 +193,16 @@ "shortTitle": "Start a new Project", "command": "codex-editor-extension.initializeNewProject" }, + { + "title": "Import USFM", + "shortTitle": "Import USFM", + "command": "codex-editor-extension.importUsfm" + }, + { + "title": "Export USFM", + "shortTitle": "Export USFM", + "command": "codex-editor-extension.exportUsfm" + }, { "command": "codex-editor-extension.downloadSourceTextBibles", "title": "Download Source Text Bible" @@ -424,6 +434,7 @@ }, "scripts": { "vscode:prepublish": "npm run compile", + "build": "webpack", "compile": "webpack", "watch": "webpack --watch", "package": "webpack --mode production --devtool hidden-source-map", @@ -482,13 +493,19 @@ "moment": "^2.30.1", "path": "^0.12.7", "pnpm": "^8.15.5", + "proskomma-core": "^0.10.4", + "proskomma-json-tools": "^0.8.7", "react-wordcloud": "^1.2.7", "semver": "^7.6.0", "sinon": "^17.0.1", + "string-punctuation-tokenizer": "^2.2.0", "uuid": "^9.0.1", "vscode-languageclient": "^9.0.1", "vscode-languageserver": "^9.0.1", - "vscode-languageserver-textdocument": "^1.0.11" + "vscode-languageserver-textdocument": "^1.0.11", + "wordmap": "^0.6.0", + "wordmap-lexer": "^0.3.5", + "wordmapbooster": "^1.0.0" }, "overrides": { "minimatch": "5.1.2", diff --git a/src/extension.ts b/src/extension.ts index 0c5685531..c3b035271 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -26,6 +26,7 @@ import { initializeWebviews } from "./activationHelpers/contextAware/webviewInit import { syncUtils } from "./activationHelpers/contextAware/syncUtils"; import { initializeStateStore } from "./stateStore"; import { projectFileExists } from "./utils/fileUtils"; +import { registerUsfmImporter } from "./usfmStuff/importUsfm"; // The following block ensures a smooth user experience by guiding the user through the initial setup process before the extension is fully activated. This is crucial for setting up the necessary project environment and avoiding any functionality issues that might arise from missing project configurations. @@ -48,6 +49,7 @@ export async function activate(context: vscode.ExtensionContext) { registerReferencesCodeLens(context); registerSourceCodeLens(context); registerTextSelectionHandler(context, () => undefined); + registerUsfmImporter(context); const [, syncStatus] = registerScmStatusBar(context); syncUtils.registerSyncCommands(context, syncStatus); @@ -73,7 +75,7 @@ export function deactivate(): Thenable { } async function executeCommandsAfter() { - // wasn't sure if these had to be executed seperately but it's here to be on the safeside, otherwise later it should go in commands.ts + // wasn't sure if these had to be executed separately but it's here to be on the safe side, otherwise later it should go in commands.ts vscode.commands.executeCommand("workbench.action.focusAuxiliaryBar"); vscode.commands.executeCommand( diff --git a/src/providers/dictionaryTable/DictionaryTablePanel.ts b/src/providers/dictionaryTable/DictionaryTablePanel.ts index 32956cbe6..7fa413373 100644 --- a/src/providers/dictionaryTable/DictionaryTablePanel.ts +++ b/src/providers/dictionaryTable/DictionaryTablePanel.ts @@ -39,6 +39,7 @@ export class DictionaryTablePanel { id: "", label: "", entries: [{ + headForm: "", id: "", headWord: "", variantForms: [], diff --git a/src/providers/obs/data/TextTemplate.json b/src/providers/obs/data/TextTemplate.json index d6bfd3e4c..6dc8eba41 100644 --- a/src/providers/obs/data/TextTemplate.json +++ b/src/providers/obs/data/TextTemplate.json @@ -78,7 +78,7 @@ "en": "Exodus" }, "long": { - "en": "The Second Book of Mosis, Commonly Called Exodus" + "en": "The Second Book of Moses, Commonly Called Exodus" } }, "LEV": { @@ -89,7 +89,7 @@ "en": "Leviticus" }, "long": { - "en": "The Third Book of Mosis, Commonly Called Leviticus" + "en": "The Third Book of Moses, Commonly Called Leviticus" } }, "NUM": { @@ -287,7 +287,7 @@ "en": "Ecclesiastes" }, "long": { - "en": "Ecclesiates or, The Preacher" + "en": "Ecclesiastes or, The Preacher" } }, "SNG": { diff --git a/src/usfmStuff/customizedJLDiff.ts b/src/usfmStuff/customizedJLDiff.ts new file mode 100644 index 000000000..4d9b30158 --- /dev/null +++ b/src/usfmStuff/customizedJLDiff.ts @@ -0,0 +1,110 @@ + + +export interface TAttributedChar { + char: string; + blockIndex: number; + contentIndex: number; + charIndex: number; + isMeta: boolean; + supplemented?: boolean; +} + + +export interface TAttributedString extends Array {} + + +export enum DiffState { + STATE_MATCH, + STATE_PASSING_1ST, + STATE_PASSING_2ND +} + + +class LineCompIndex { + errorCount: number; + previous: LineCompIndex | null; + state: DiffState; + content: TAttributedChar; + + constructor() { + this.errorCount = 0; + this.previous = null; + this.state = DiffState.STATE_PASSING_1ST; + this.content = { char: "", blockIndex: 0, contentIndex: 0, charIndex: 0, isMeta: false }; + } +} + +export function traceDiffs( content1: TAttributedString, content2: TAttributedString ){ + let lastLine : LineCompIndex[] = []; + let thisLine : LineCompIndex[] = []; + + //init the root root + let thisIndex = new LineCompIndex(); + thisIndex.state = DiffState.STATE_MATCH; + thisLine.push(thisIndex); + + //init the root top case + let columnIndex = 1; + content2.forEach( (char2, index2) => { + thisIndex = new LineCompIndex(); + thisIndex.previous = thisLine[ columnIndex-1 ]; + thisIndex.errorCount = thisIndex.previous.errorCount+1; + thisIndex.content = char2; + thisIndex.state = DiffState.STATE_PASSING_2ND; + thisLine.push( thisIndex ); + columnIndex += 1; + }); + + content1.forEach( (char1, index1) => { + lastLine = thisLine; + thisLine = []; + + //init the root left case + thisIndex = new LineCompIndex(); + thisIndex.previous = lastLine[ 0 ]; + thisIndex.errorCount = thisIndex.previous.errorCount+1; + thisIndex.content = char1; + thisIndex.state = DiffState.STATE_PASSING_1ST; + thisLine.push( thisIndex ); + + columnIndex = 1; + for (const char2 of content2) { + thisIndex = new LineCompIndex(); + + if( char2.char == char1.char && char2.isMeta == char1.isMeta ){ + thisIndex.previous = lastLine[ columnIndex-1 ]; + thisIndex.errorCount = thisIndex.previous.errorCount; + + thisIndex.state = DiffState.STATE_MATCH; + thisIndex.content = char1; + + }else{ + if( lastLine[ columnIndex ].errorCount < thisLine[ columnIndex-1 ].errorCount ){ + thisIndex.previous = lastLine[ columnIndex ]; + thisIndex.content = char1; + thisIndex.state = DiffState.STATE_PASSING_1ST; + }else{ + thisIndex.previous = thisLine[ columnIndex-1 ]; + thisIndex.content = char2; + thisIndex.state = DiffState.STATE_PASSING_2ND; + } + + thisIndex.errorCount = thisIndex.previous.errorCount+1; + } + + thisLine.push( thisIndex ); + columnIndex += 1; + } + }); + + const backwardsList : LineCompIndex[] = []; + let currentNode : LineCompIndex | null = thisLine[ thisLine.length-1 ]; + while( currentNode != null ){ + backwardsList.push( currentNode); + currentNode = currentNode.previous; + } + + const forwardsList = backwardsList.reverse(); + + return forwardsList; +} \ No newline at end of file diff --git a/src/usfmStuff/importUsfm.ts b/src/usfmStuff/importUsfm.ts new file mode 100644 index 000000000..6ff88a8b1 --- /dev/null +++ b/src/usfmStuff/importUsfm.ts @@ -0,0 +1,1256 @@ +// The module 'vscode' contains the VS Code extensibility API +// Import the module and reference it with the alias vscode in your code below +import * as vscode from 'vscode'; +import { Perf, PerfContent, PerfMetadataDocument, PerfReferenceSet, PerfVerse, TBlockContentIndex, chopUpPerfIntoChaptersAndVerses, deepCopy, extractAlignmentsFromPerfVerse, getAttributedVerseCharactersFromPerf, getIndexedReferencesFromPerf, getReferencesFromPerf, perfToUsfm, pullVerseFromPerf, reindexPerfVerse, replaceAlignmentsInPerfInPlace, stringToPerfVerse, stripAttributedString, usfmToPerf } from './utils'; +import {CodexContentSerializer} from "../serializer"; +import {generateFiles} from "../utils/fileUtils"; +import { CellTypes } from '../utils/codexNotebookUtils'; +import path from 'path'; +import { DiffState, TAttributedString, traceDiffs } from './customizedJLDiff'; +import { getWorkSpaceFolder } from '../utils'; + +type UsfmImportParameters = { + usfmFiles: vscode.Uri[]; +} +type UsfmExportParameters = { + usfmSaveUri: vscode.Uri; +} + +async function getImportParameters() : Promise { + //https://vshaxe.github.io/vscode-extern/vscode/OpenDialogOptions.html + const usfmFiles = await vscode.window.showOpenDialog({ + canSelectFolders: false, + canSelectFiles: true, + canSelectMany: true, + openLabel: "Choose USFM file(s) to import", + filters: { + 'USFM': ['usfm','USFM','usf','USF'] + } + }); + //Throw an exception if the user canceled. + if (!usfmFiles) throw new Error('User canceled import.'); + + + return { usfmFiles }; +} + +async function getExportParameters( codex_filename: string ) : Promise { + //show a save as dialog pre-populated with the usfm version of the currently open document. + + const usfm_filename = path.parse(codex_filename).name + ".usfm"; + + //get the root directory of the current open project. + const root_dir = vscode.workspace.workspaceFolders?.[0].uri.fsPath; + if (!root_dir) { + //throw exception + throw new Error("No workspace folders found."); + } + + const usfmUri = vscode.Uri.file(path.join(root_dir, usfm_filename)); + + //https://vshaxe.github.io/vscode-extern/vscode/SaveDialogOptions.html + const usfmSaveUri = await vscode.window.showSaveDialog({ + defaultUri: usfmUri, + filters: { + 'USFM': ['usfm','USFM','usf','USF'] + }, + saveLabel: "Export USFM", + title: "Export USFM" + }); + //Throw an exception if the user canceled + if (!usfmSaveUri) throw new Error('User canceled export.'); + return { usfmSaveUri }; +} + +async function collectScriptureDataFromNotebook( notebook: vscode.NotebookDocument ) : Promise<{ [ref: string]: string; }> { + + //regular expression which will match a number a colon and a number. + const referenceFinder = /(?\d+):(?\d+)/; + + + const result: {[ref: string]: string} = {}; + + for (let i = 0; i < notebook.cellCount; i++) { + const cell = notebook.cellAt(i); + if (cell.kind === vscode.NotebookCellKind.Code) { + //only consider code cells. The headers and the notes + //are markdown. + const content = cell.document.getText(); + + //iterate line by line. + const lines = content.split("\n"); + for (const line of lines) { + const match = referenceFinder.exec(line); + if (match) { + const ref = `${match.groups!.chapter}:${match.groups!.verse}`; + const matchIndex = match.index; + const matchLength = match[0].length; + let firstNonMatchedIndex = matchIndex + matchLength; + //inc if that is a space. + if (firstNonMatchedIndex < line.length && line[firstNonMatchedIndex] == " ") { + firstNonMatchedIndex++; + } + //The verse is everything after the capture. + const verse = line.substring(firstNonMatchedIndex); + result[ref] = verse; + } + } + } + } + + return result; +} + +function getUnupdatedPerfFromNotebookOrMakeIt( notebook: vscode.NotebookDocument ) : Perf { + //the perf is stashed in the markdown for Chapter 1. + //So just scan through all the cells and return the first perf + //in the metadata which is found. + //If there is none we will create one and return it. + + for (let i = 0; i < notebook.cellCount; i++) { + const cell = notebook.cellAt(i); + if (cell.kind === vscode.NotebookCellKind.Markup) { + if( cell?.metadata?.perf ){ + return deepCopy(cell.metadata.perf) as Perf; + } + } + } + + //if we get this far we need to construct a perf. + //So invent a minimal usfm and convert it to perf. + const minimal_usfm = ` +\\c 1 +\\p +\\v 1 blank + `.trim(); + const minimal_perf = usfmToPerf(minimal_usfm); + return minimal_perf; +} + +async function updatePerfOnNotebook(notebook: vscode.NotebookDocument, perf: Perf) { + let cellEdit: vscode.NotebookEdit | null = null; + + // Iterate over each cell to find the ones with existing metadata + for (let i = 0; i < notebook.cellCount && cellEdit === null; i++) { + const cell = notebook.cellAt(i); + if (cell.kind === vscode.NotebookCellKind.Markup) { + if (cell.metadata?.perf) { + // Create a new metadata object with the updated perf + const newMetadata = { + ...cell.metadata, + perf: perf + }; + + // Create a notebook edit to update the cell's metadata + cellEdit = vscode.NotebookEdit.updateCellMetadata(i, newMetadata); + } + } + } + + // If we still didn't find the cell with existing metadata, go ahead and search through + // all the cells and find the first one which is referencing a chapter which should be Chapter 1. + for( let i = 0; i < notebook.cellCount && cellEdit === null; i++ ){ + const cell = notebook.cellAt(i); + if( cell.kind === vscode.NotebookCellKind.Markup ){ + //Test if the cell's contents starts with "# Chapter" + if( cell.document.getText().startsWith("# Chapter") ){ + cellEdit = vscode.NotebookEdit.updateCellMetadata(i, { + ...cell.metadata, + perf: perf + }); + } + } + } + + // Apply the edit to the notebook + if( cellEdit !== null ){ + const edit = new vscode.WorkspaceEdit(); + edit.set(notebook.uri, [cellEdit]); + await vscode.workspace.applyEdit(edit); + } +} +//The point of this hack is to get the strings to look the same as the other importer even if it doesn't make sense. +//I am trying to round trip so I want things to be the same so I can catch important stuff. We can remove the hacks +//later. +function importHacks( verseContent : string ) : string{ + + let result : string = verseContent; + + result = result.trim(); + + // // Put space between this kind of quote mark and letter. + // result = result.replace(/(‘)([a-z0-9])/ig, "$1 $2"); + + // // //add a space after a comma. (Perhaps this isn't a hack. Why are we having to do this?) + // // result = result.replace(/(,)([a-z])/ig, "$1 $2"); + + // //remove the space between a { and a letter. + // result = result.replace(/({) ([a-z])/ig, "$1$2"); + + // //surround a quote mark with spaces. + // //I don't think this is good, but it is what is currently going on. + // result = result.replace(/(’)([a-z])/ig, "$1 $2"); + // result = result.replace(/([a-z])(’)/ig, "$1 $2"); + + // //now put a space after all the commas. This makes 5,000 into 5, 000 but we are + // //just trying to match what is there and we can remove these hacks after. + // result = result.replace(/(,)([a-z0-9])/ig, "$1 $2"); + + // //Remove this space. K. + // result = result.replace(/(—) ([^ ])/ig, "$1$2"); + + // //Add this space. K. + // result = result.replace(/([a-z])(…)/ig, "$1 $2"); + + // //Add a space before { + // result = result.replace(/([a-z])({)/ig, "$1 $2"); + + // //add spaces before th between it and the number. So 11 th instead of 11th. + // result = result.replace(/([0-9])(th|st|nd|rd)/ig, "$1 $2"); + + return result; + +} + +interface PerfEditAction{ + type: "createChapter" | "insertVerse" | "edit"; //No delete for now, we will just edit to a blank verse. + chapterNumber: number; + verseNumber?: number; + newVerseText?: string; + index: TBlockContentIndex; +} +function combineIndexWithContentIntoActions( + notebook_content: { [ref: string]: string; }, + perf_index: PerfReferenceSet, + removeMissing: boolean, + perf: Perf ) : PerfEditAction[] { + + //figure out what verses are edits, which ones are inserts and which ones are deletes + + //first go through and handle all the notebook_contents. + + //Then we go through and all the verses which didn't receive a match we create a remove for them. We don't do this if removeMissing is false incase this is just a patch update for a single verse. + + //We possibly have to create new chapters. Keep track if the chapters have already been created so they don't get created twice. + + //set of chapters which already have actions to create them. + const created_chapters : { [chapter: number]: PerfEditAction } = {}; + const actions : PerfEditAction[] = []; + + //Iterate through notebook_content: + Object.entries( notebook_content ).forEach( ([ref, verseText]) => { + + const [chapterNumber, verseNumber ] : number[] = ref.split(":").map( x => parseInt(x) ); + + if( !perf_index.verses[ref] ){ + let insertionLocation : TBlockContentIndex | undefined = undefined; + let createChapter = false; + //No match, so create an insert action. + //if there is no chapter we also have to make a chapter creation action. + //test if chapter is in perf_index.chapters + if( !perf_index.chapters[chapterNumber] ){ + if( !created_chapters[chapterNumber] ){ + //we have to create a chapter. But before we can do that we need to know + //the first block of the next chapter if it exists or the end. + const greaterChapters = Object.keys( perf_index.chapters ).map( x => parseInt(x)).filter( x => x > chapterNumber ).sort( (a,b) => a-b ); + + insertionLocation = greaterChapters.length ? perf_index.chapters[greaterChapters[0]] : {b: perf_index.last.b, c:perf_index.last.c + 1}; + + + createChapter = true; + }else{ + + //If the chapter doesn't exist, but it is already going to be created insert at the same location. The sort later on makes the verse + //insertions happen before the chapter insertions so that makes the chapters end up in front of the verses because of inserting at the same index. + insertionLocation = created_chapters[chapterNumber].index; + } + }else{ + //if the verse doesn't exist but the chapter, + //check to see if the next verse exists. If it does insert just before it, + //otherwise insert just before the next chapter. + const greaterVersesInChapter = Object.keys( perf_index.verses ).filter( ref => ref.startsWith( `${chapterNumber}:` ) ).map( x => parseInt(x.split(":")[1])).filter( x => x > verseNumber ).sort( (a,b) => a-b ); + if( greaterVersesInChapter.length ){ + insertionLocation = perf_index.verses[`${chapterNumber}:${greaterVersesInChapter[0]}`]; + }else{ + //if there isn't a greater verse, see if there is a greater chapter: + const greaterChapters = Object.keys( perf_index.chapters ).map( x => parseInt(x)).filter( x => x > chapterNumber ).sort( (a,b) => a-b ); + if( greaterChapters.length ){ + insertionLocation = perf_index.chapters[greaterChapters[0]]; + }else{ + //if there isn't a greater chapter either, then insert at the end. + insertionLocation = perf_index.last; + } + } + } + + //we should never insert before a chapter mark because anything that is in the same block as a chapter gets added to that chapter. + //The code which does the chapter insert will create a new block for it when it happens, but we need to not add it in before + //another chapter. I will test for a c==0 and if b>0 we will set our insertion point to be the length of the previous block. + //This could fail if something else ends up being at the front of the chapter which we don't know about but makes the chapter mark + //we are inserting in front of not be at c==0. + if( insertionLocation.c == 0 && insertionLocation.b > 0 ){ //bookmark1 + insertionLocation.b--; + insertionLocation.c = perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.[insertionLocation.b]?.content?.length ?? 0; + } + + //In this case the chapter doesn't exist, but we either created the action to create + //it or we found we already created the action to create it, and we also want to + //add the verse to the same location in the perf. + const action = { + type: "insertVerse" as const, + chapterNumber, + verseNumber, + newVerseText: verseText, + index: insertionLocation, + }; + actions.push( action ); + + if( createChapter ){ + //create a chapter. + const action = { + type: "createChapter" as const, + chapterNumber, + index: insertionLocation }; + + actions.push( action ); + created_chapters[chapterNumber] = action; + } + }else{ + //in this case the verse already exists in the perf so we will just need to edit it. + const action = { + type: "edit" as const, + chapterNumber, + verseNumber, + newVerseText: verseText, + index: perf_index.verses[ref] + }; + actions.push( action ); + } + + }); + + //now we need to create drop edit actions for all the things which didn't receive a match. (Right now we will just make them edits to a blank verse) + if( removeMissing ){ + Object.entries(perf_index.verses).filter( ([ref, index]) => !notebook_content[ref] ).forEach( ([ref, index]) => { + + const [chapterNumber, verseNumber ] : number[] = ref.split(":").map( x => parseInt(x) ); + const action = { + type: "edit" as const, + chapterNumber, + verseNumber, + newVerseText: "", + index + }; + actions.push( action ); + }); + } + + //now we need to sort the actions so that they can be executed in order. + //I have b-a because I want this in descending order so that the list is processed from the end to the beginning. + actions.sort( (a,b) => { + //if the location of operation is different, then obviously that order needs to be respected. + if( a.index.b != b.index.b ) return b.index.b - a.index.b; + if( a.index.c != b.index.c ) return b.index.c - a.index.c; + + //We now need to sort by chapter, because if we have a mix of createChapter and insertVerse + //we need them interleaved by chapter so that the verses end up in the correct chapter even though + //they all have the same insertion point. + if( a.chapterNumber != b.chapterNumber ){ + //still have reversed sorting because repeated insertion at the same index will produce a + //a reversed result from action order. + return b.chapterNumber - a.chapterNumber; + } + + //Now within the given chapter, need to have the createChapter end up before the insertVerse + //so in action order it needs to come after so we have the operationOrder in the order we want + //it to end up in PERF, and then the action order reverses it with b-a. + //The edit comes last because it must be the first action, because when it is not creating something + //the index for it is actually what is there and not after something takes its place. + //This would happen if you add a verse right before a verse you edited. + const operationOrder = ["createChapter", "insertVerse", "edit",]; + const aTypeOrder = operationOrder.indexOf(a.type); + const bTypeOrder = operationOrder.indexOf(b.type); + if( aTypeOrder != bTypeOrder ) return bTypeOrder - aTypeOrder; + + //finally we need to sort by verse number. This is for multiple verse insertions so that they + //end up in the right order. + if( a.verseNumber !== undefined && b.verseNumber !== undefined ){ + //again reverse sort on verseNumber because the action order is opposite of the resulting + //perf order. + if( a.verseNumber != b.verseNumber ) return b.verseNumber - a.verseNumber; + } + + //If we get here the moon must be imploding... or they added two verses with the same reference. + return 0; + }); + + return actions; +} + +function createChapter( perf: Perf, chapterNumber: number, insertionIndex: TBlockContentIndex ){ + const newChapterMark = { + type: "mark", + subtype: "chapter", + atts: { + number: chapterNumber.toString(), + } + }; + + //snip off everything from the specified index on to be in the new block. + const snippedContent = perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.[insertionIndex.b]?.content?.splice( insertionIndex.c ) ?? []; + + const newBlock = { + type: 'paragraph', subtype: 'usfm:p', + content: [ newChapterMark, ...snippedContent ] + }; + + //now splice into the perf at the insertion index. + perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.splice( insertionIndex.b+1, 0, newBlock ); +} + +function insertVerse( perf: Perf, chapterNumber: number, verseNumber: number, verseText: string, insertionIndex: TBlockContentIndex ){ + const newSection = stringToPerfVerse( verseText ); + + //add the chapter mark to the front. + newSection.unshift( { + type: "mark", + subtype: "verses", + atts: { + number: verseNumber.toString(), + } + } ); + + //now splice into the perf at the insertion index. + perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.[insertionIndex.b]?.content?.splice( insertionIndex.c, 0, ...newSection ); +} + +function editVerse( perf: Perf, chapterNumber: number, verseNumber: number, newVerseText: string, insertionIndex: TBlockContentIndex ){ + //first see if the verse actually needs to be edited. + const testExport = importHacks( getAttributedVerseCharactersFromPerf( perf, {chapter:chapterNumber, verse:verseNumber}, false, insertionIndex) as string); + + //A string comparison is cheaper then a diff, so + //we do this first off so that we don't do diffs on all the + //unmodified content. + if( testExport === newVerseText ) return; + + //grab the alignment from the perf so we can fix the alignments back up after the edit. + const savedAlignments = extractAlignmentsFromPerfVerse( pullVerseFromPerf( `${chapterNumber}:${verseNumber}`, perf, insertionIndex ) ?? [] ); + + + //Now get the target as an attributed string where we can find where each character came from. + const attributedTarget = getAttributedVerseCharactersFromPerf( perf, {chapter:chapterNumber, verse:verseNumber}, true, insertionIndex) as TAttributedString; + + + //Do the same for the incoming new content except that we have to wrap it in a fake perf to make the attributes have context. + const newPerfVerse = stringToPerfVerse( newVerseText ); + + + //These functions are defined here because they are just specific to this function, if that is bad form they should be able to be moved + //outward. + function getPerfChar( _perf: Perf, _blockIndex: number, _contentIndex: number, _charIndex: number ){ + const content = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content?.[_contentIndex]; + if( typeof( content ) == "string" ){ + return content[_charIndex]; + }else if( typeof( content ) == "object" ){ + return content.content?.join("")[_charIndex]; + } + return ""; + } + function dropPerfChar( _perf: Perf, _blockIndex: number, _contentIndex: number, _charIndex: number ){ + let dropped_char : string | undefined = undefined; + const contentArray = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content; + if( contentArray != undefined ){ + const content = contentArray[_contentIndex]; + if( typeof( content ) == "string" ){ + dropped_char = content[_charIndex]; + contentArray[_contentIndex] = content.substring(0, _charIndex) + content.substring(_charIndex + 1); + }else if( typeof( content ) == "object" ){ + let usedCharIndex = _charIndex; + for(let i = 0; i < content.content!.length; ++i ){ + const oneContent = content.content![i]; + if( usedCharIndex < oneContent.length ){ + dropped_char = oneContent[usedCharIndex]; + content.content![i] = oneContent.substring(0, usedCharIndex) + oneContent.substring(usedCharIndex + 1); + break; + } + usedCharIndex -= oneContent.length; + } + } + } + return dropped_char; + } + function addPerfPiece( _perf: Perf, _blockIndex: number, _contentIndex: number, _content: string, _makeWord: boolean ){ + const contentArray = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content; + if( contentArray !== undefined ){ + if( _makeWord ){ + contentArray.splice( _contentIndex, 0, { + type: "wrapper", + subtype: 'usfm:w', + content: [ _content ], + }); + }else{ + contentArray.splice( _contentIndex, 0, _content ); + } + } + } + function insertIntoPerfPiece( _perf: Perf, _blockIndex: number, _contentIndex: number, _charIndex: number, _char: string ){ + const contentArray = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content; + if( contentArray !== undefined ){ + if( _charIndex < 0 ){ + //if our _charIndex is -1 for this insertion, then that means, insert before this word boundary. + //If _charIndex is a word, then we need to insert a string at the current index. + //If the current index is already a string, then this has already happened. + if( typeof(contentArray[_contentIndex]) == "string" ){ + contentArray[_contentIndex] = _char + contentArray[_contentIndex]; + }else{ + contentArray.splice( _contentIndex, 0, _char ); + } + }else{ + const content : string | PerfContent | undefined = contentArray?.[_contentIndex]; + if( content === undefined ){ + throw new Error("Internal error. Attempting to insert a character into a perf that does not exist."); + }else if( typeof( content ) == "string" ){ + contentArray![_contentIndex] = content.slice(0, _charIndex) + _char + content.slice(_charIndex); + }else if( typeof( content ) == "object" ){ + + const contentLength = content.content!.join("").length; + if( _charIndex > contentLength ){ + //If _charIndex is > then the length of this word, then it means to add it outside of the word boundary. + //So if the next index is a string then add the content as a prefix to that string, otherwise + //we need to insert a string at the following index. + if( _contentIndex+1 < contentArray.length && typeof(contentArray[_contentIndex+1]) == "string" ){ + contentArray[_contentIndex+1] = _char + contentArray[_contentIndex+1]; + }else{ + contentArray.splice( _contentIndex+1, 0, _char ); + } + + }else{ + + let usedCharIndex = _charIndex; + for( let i = 0; i < content.content!.length; ++i ){ + const oneContent = content.content![i]; + if( usedCharIndex <= oneContent.length ){ + content.content![i] = oneContent.slice(0, usedCharIndex) + _char + oneContent.slice(usedCharIndex); + break; + } + usedCharIndex -= oneContent.length; + } + } + } + } + } + } + function getPerfPiece( _perf: Perf, _blockIndex: number, _contentIndex: number ){ + const content = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content?.[_contentIndex]; + if( typeof( content ) == "string" ){ + return content; + }else if( typeof( content ) == "object" ){ + return content.content?.join(""); + } + return ""; + } + function dropPerfPiece( _perf: Perf, _blockIndex: number, _contentIndex: number ){ + const contentArray = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content; + const droppedPiece = contentArray?.splice( _contentIndex, 1 ); + + if( droppedPiece === undefined || droppedPiece.length == 0 ){ + throw new Error("Internal error. Attempting to drop a perf that does not exist."); + } + + if( typeof( droppedPiece[0]) != "string" && + (typeof(droppedPiece[0]) != "object" || (droppedPiece[0] as PerfContent)?.type != "wrapper" || (droppedPiece[0] as PerfContent)?.subtype != "usfm:w") ){ + throw new Error("Internal error. Attempting to drop a perf piece that is not a word or a string."); + } + } + + function splitPerfPiece( _perf: Perf, _blockIndex: number, _contentIndex: number, _charIndex: number, _makeWord: boolean ){ + const contentArray = _perf?.sequences?.[_perf?.main_sequence_id ?? ""]?.blocks?.[_blockIndex]?.content; + if( contentArray != undefined ){ + const content : string | PerfContent | undefined = contentArray?.[_contentIndex]; + + + let existingContentString : string = ""; + + //pull the content. + if( content === undefined ){ + throw new Error("Internal error. Attempting to split a perf that does not exist."); + }else if( typeof( content ) == "string" ){ + existingContentString = content; + }else if( typeof( content ) == "object" && content.type == "wrapper" && content.subtype == "usfm:w" ){ + existingContentString = content.content!.join(""); + }else{ + throw new Error("Internal error. Attempting to split a perf piece that is not a word or a string."); + } + + //if the _charIndex is -1, then this is when we are dealing with content before the + //first word. + //if we are creating a word, then upgrade the current string to a word and leave it. + //Any more characters added at -1 will end up before it as a new string. + //if we are not making a word, then we need to insert a zero length string + //before this string so that new chars get added to that. If that is a word + //it will get upgraded later. + if( _charIndex == -1 ){ + if( typeof( content ) == "string" ){ + if( _makeWord ){ + const newWord = { + type: "wrapper", + subtype: 'usfm:w', + content: [ existingContentString ], + }; + //replace with content upgraded to word. + contentArray[_contentIndex] = newWord; + }else{ + //otherwise insert a zero length string. + contentArray?.splice(_contentIndex, 0, ""); + } + } + + + + //if the split point is the length of the current content, + //then we don't need to do anything. This is because the + //first word or intraword we create will not have a word after + //it that needs to be created. + }else if( _charIndex < existingContentString.length ){ + + //split it. + const part1 = existingContentString.slice(0, _charIndex); + const part2 = existingContentString.slice(_charIndex); + + + //put part1 back in. + if( typeof( content ) == "string" ){ + contentArray![_contentIndex] = part1; + }else{ + content!.content = [ part1 ]; + } + + //now insert part2. + if( _makeWord ){ + const newWord = { + type: "wrapper", + subtype: 'usfm:w', + content: [ part2 ], + }; + contentArray?.splice(_contentIndex+1, 0, newWord); + }else{ + contentArray?.splice(_contentIndex+1, 0, part2); + } + } + } + + } + + //if the existing perf verse is empty, then just concat it in. + if( attributedTarget.length == 0 ){ + const splicePoint = {b:insertionIndex.b,c:insertionIndex.c+1}; //+1 to get after the verse marker. + perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.[splicePoint.b]?.content?.splice( splicePoint.c, 0, ...newPerfVerse ); + }else{ + + const newFakePerf = { + main_sequence_id: "new_verse", + sequences: {"new_verse": { + blocks: [{ + content:newPerfVerse, + type: 'paragraph', + }] + }} + }; + const attributedSource = getAttributedVerseCharactersFromPerf( newFakePerf, {chapter:chapterNumber, verse:verseNumber}, true, {b:0, c:0}) as TAttributedString; + + //now run a diff between the two of them. + const editDiffs = traceDiffs( attributedTarget, attributedSource ); + + //need to go through the diff and make the insertions be referencing the locations in the target perf and not the source perf. + //we do this by running through it in the reverse direction and keeping the last seen index in the target perf and setting that as the insertion + //location for all the characters which are inserts. + const lastAttributedTarget = attributedTarget[attributedTarget.length-1]; + let insertBlockIndex = lastAttributedTarget.blockIndex; + let insertContentIndex = lastAttributedTarget.contentIndex; + let insertCharacterIndex = lastAttributedTarget.charIndex + 1; //add one so that it would insert after the last char if we have inserts before getting there. + for( let i = editDiffs.length-1; i >= 0; i-- ){ + const editDiff = editDiffs[i]; + if( editDiff.state == DiffState.STATE_PASSING_2ND ){ + //This is an insert which means it is referencing the fake perf, and we need to copy the index stuff into it. + editDiff.content.blockIndex = insertBlockIndex; + editDiff.content.contentIndex = insertContentIndex; + editDiff.content.charIndex = insertCharacterIndex; + }else{ + insertBlockIndex = editDiff.content.blockIndex; + insertContentIndex = editDiff.content.contentIndex; + insertCharacterIndex = editDiff.content.charIndex; + } + } + + //TODO: Need to go through the diff and remove word boundary modifications in sections that were not edited. + //This perhaps should be added once there is a way to manually change the word boundaries so that there is boundary information that should be preserved. + //when it is done the word boundary modification removals need to be done in corresponding pairs. + + + + //now apply the edits to the perf + //iterate through the edit diffs in reverse so that the indexes are still correct when we get to them. + for( let i = editDiffs.length-1; i >= 0; i-- ){ + const editDiff = editDiffs[i]; + const targetChar = editDiff.content; + if( !targetChar.isMeta ){ + if( editDiff.state == DiffState.STATE_PASSING_1ST ){ + //passing first means deleting the target character. So it needs to be spliced out. + //see if supplemented is in the content.. If it is supplemented, you can't remove it. + if( !editDiff.content.supplemented ){ + const droppedChar = dropPerfChar( perf, targetChar.blockIndex, targetChar.contentIndex, targetChar.charIndex ); + if( droppedChar != targetChar.char ){ + throw new Error("Internal error. Attempting to remove a character that is not in the perf."); + } + } + }else if( editDiff.state == DiffState.STATE_PASSING_2ND ){ + //passing second means adding the target character. So it needs to be added. + insertIntoPerfPiece( perf, targetChar.blockIndex, targetChar.contentIndex, targetChar.charIndex, targetChar.char ); + }else if( editDiff.state == DiffState.STATE_MATCH ){ + //just double check that this char is correct. + //You can't verify a match with supplemented characters. + if( !editDiff.content.supplemented ){ + const currentChar = getPerfChar( perf, targetChar.blockIndex, targetChar.contentIndex, targetChar.charIndex ); + if( currentChar != targetChar.char ){ + throw new Error("Internal error. Character match is wrong."); + } + } + } + }else{ //if is meta (word boundary changes) + //meta edits are the addition and removal of word boundaries and are a bit more interesting. + if( editDiff.state == DiffState.STATE_PASSING_1ST ){ + + if( editDiff.content.char == "<" ){ + //This means that we are removing the current start of word boundary. So the content of the current word or string + //needs to be added to the end of the string or word that comes at a lower perf index. + if( editDiff.content.charIndex !== -1 ){ + throw new Error( "Internal error. Trying to remove a word boundary that is not at the start of a word." ); + } + if( i > 0 ){ + //Can only combine in with the previous content if it actually exists. + if( !editDiffs[i-1].content.supplemented ){ + const pieceToInsert = getPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex ); + //first insert it into the previous area + if( pieceToInsert ){ + insertIntoPerfPiece( perf, editDiffs[i-1].content.blockIndex, editDiffs[i-1].content.contentIndex, editDiffs[i-1].content.charIndex+1, pieceToInsert ); + } + //and then remove as its own entity. + dropPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex ); + }else{ + //otherwise we need to just demote ourselves to a string and the boundary before us is ok. + const demotedContent = getPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex )!; + dropPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex ); + //don't insert if an empty string or undefined. + if( demotedContent ){ + addPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex, demotedContent, false ); + } + } + } + }else if( editDiff.content.char == ">" ){ + //This means the removal of the end of a word boundary. But the indexing for this is at the tail end of the word that + //the boundary is being removed on. + //So we take all of the next perf index and add it to the current item. This item type is defined by the start boundary + //of it, which we will leave alone unless the diff gets there and changes it. + if( editDiff.content.charIndex != getPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex )?.length ){ + throw new Error( "Internal error. Trying to remove a word boundary that is not at the end of a word." ); + } + if( i < editDiffs.length-1 ){ + //Even if editDiffs[i+1] was a deletion, there should still be an empty string there still. + //but we need to make sure it isn't supplemented content because then it doesn't actually exist. + if( !editDiffs[i+1].content.supplemented ){ + const pieceToInsert = getPerfPiece( perf, editDiffs[i+1].content.blockIndex, editDiffs[i+1].content.contentIndex ); + if( pieceToInsert ){ + //first insert it into the next area + insertIntoPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex, editDiff.content.charIndex, pieceToInsert ); + } + //and then remove as its own entity. + dropPerfPiece( perf, editDiffs[i+1].content.blockIndex, editDiffs[i+1].content.contentIndex ); + }else{ + //otherwise, just do nothing because we there is a paragraph break after us or something that can't be joined in. + } + } + } + }else if( editDiff.state == DiffState.STATE_PASSING_2ND ){ + //This means that we are inserting a boundary. So we trim off the rest of the current word or string + //and add it as a new content location after this. Depending on if this is a begin or end boundary + //changes if we create a word or string after this. + //for insertions we get our index information from the reverse drag of references, so it is always within context of the last thing. + //Not like in the removals where sometimes the location is before the boundary and sometimes after the boundary. + if( editDiff.content.char == "<" ){ + //we are inserting the start of word boundary. So chop off the rest of the content and insert as a word. + splitPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex, editDiff.content.charIndex, true ); + }else if( editDiff.content.char == ">" ){ + //we are inserting the end of word boundary. So we chip of the rest of the current and insert it as intraword. + splitPerfPiece( perf, editDiff.content.blockIndex, editDiff.content.contentIndex, editDiff.content.charIndex, false ); + } + }// if we match a start of word boundary or end of word boundary big whoop. + } + } + } + + //now that we are done editing the verse we need to reindex it. + const perfVerse = pullVerseFromPerf( `${chapterNumber}:${verseNumber}`, perf, insertionIndex ); + if( perfVerse === undefined ){ + throw new Error( "Internal error. Edited verse missing from perf." ); + } + reindexPerfVerse( perfVerse!, false ); + + //now time to fix the alignments. + replaceAlignmentsInPerfInPlace( perf, chapterNumber, verseNumber, insertionIndex, savedAlignments ); + + //const testExport2 = getAttributedVerseCharactersFromPerf( perf, {chapter:chapterNumber, verse:verseNumber}, false, insertionIndex) as string; + + //console.log( `Edited verse now looks like: "${testExport2}"` ); + + //pop it up in vscode + //vscode.window.showInformationMessage( `Edited verse ${chapterNumber}:${verseNumber} now looks like "${testExport2}"!` ); +} + +function executeNotebookEditActions( unupdated_perf : Perf, notebook_edit_actions: PerfEditAction[], perf_index: PerfReferenceSet ){ + + for( const action of notebook_edit_actions ){ + + // //Debug. 29:25 + // const testVerse = getAttributedVerseCharactersFromPerf( unupdated_perf, {chapter:29, verse:25}, false, perf_index.verses["29:25"]); + // console.log( `At this point the verse reads "${testVerse}"` ); + + switch( action.type ){ + case "createChapter": + createChapter( unupdated_perf, action.chapterNumber, action.index ); + break; + case "insertVerse": + insertVerse( unupdated_perf, action.chapterNumber, action.verseNumber!, action.newVerseText!, action.index ); + break; + case "edit": + editVerse( unupdated_perf, action.chapterNumber, action.verseNumber!, action.newVerseText!, action.index ); + break; + } + + } +} + +async function getPerfFromActiveNotebook() : Promise { + + + const notebookEditor = vscode.window.activeNotebookEditor; + if (!notebookEditor) throw new Error('No active notebook editor found'); + const notebook = notebookEditor.notebook; + + //iterate through each cell in the notebook. + + const notebook_content = await collectScriptureDataFromNotebook(notebook); + + const perf = getUnupdatedPerfFromNotebookOrMakeIt(notebook); + + const perf_index = getIndexedReferencesFromPerf(perf); + + const notebook_edit_actions = combineIndexWithContentIntoActions(notebook_content, perf_index, true, perf); + + executeNotebookEditActions( perf, notebook_edit_actions, perf_index ); + + await updatePerfOnNotebook( notebook, perf ); + + return perf; +} + + +async function doUsfmExport(codex_filename: string, exportParameters: UsfmExportParameters) { + const perf = await getPerfFromActiveNotebook(); + + const usfmData = perfToUsfm( perf ); + + await vscode.workspace.fs.writeFile(exportParameters.usfmSaveUri, Buffer.from(usfmData)); +} + + +/** + * Appends a smiley face to the content of the first cell in the active notebook editor. + * + * @return {Promise} A promise that resolves when the smiley face is successfully appended, or rejects with an error message if no active notebook editor is found or if the operation fails. + */ +async function doSmileyToFirstCell() { + const notebookEditor = vscode.window.activeNotebookEditor; + if (!notebookEditor) { + vscode.window.showErrorMessage('No active notebook editor found'); + return; + } + + + const notebook = notebookEditor.notebook; + + // Check if the notebook has at least one cell + if (notebook.cellCount > 0) { + const firstCell = notebook.cellAt(0); + const updatedText = firstCell.document.getText() + ' 😊'; + + // Create an edit to update the cell's content + const edit = new vscode.WorkspaceEdit(); + edit.replace(firstCell.document.uri, new vscode.Range(0, 0, firstCell.document.lineCount, 0), updatedText); + + // Apply the edit + const success = await vscode.workspace.applyEdit(edit); + if (success) { + vscode.window.showInformationMessage('Smiley appended to the first cell'); + } else { + vscode.window.showErrorMessage('Failed to append smiley to the first cell'); + } + } else { + vscode.window.showErrorMessage('The notebook does not contain any cells'); + } +} + + +async function readUsfmData( usfmFiles: vscode.Uri[] ) { + //read them all in parallel + const filenameToUsfmData: { [filename: string]: string} = Object.fromEntries( + await Promise.all( + usfmFiles.map( + async usfmFile => [ + usfmFile.fsPath, + (await vscode.workspace.fs.readFile(usfmFile)).toString() + ] + ) + ) + ); + + //now convert them all into perf. + const filenameToPerf = Object.fromEntries( + await Promise.all( + Object.entries(filenameToUsfmData).map( + async ([filename, usfmData]) => [ + filename, + usfmToPerf(usfmData) + ] + ) + ) + ); + return filenameToPerf; +} + +// export async function createProjectNotebooks({ +// shouldOverWrite = false, +// books = undefined, +// foldersWithUsfmToConvert = undefined, +// }: { +// shouldOverWrite?: boolean; +// books?: string[] | undefined; +// foldersWithUsfmToConvert?: vscode.Uri[] | undefined; +// } = {}) { +// const notebookCreationPromises = []; +// let projectFileContent: ParsedUSFM[] | undefined = undefined; +// if (foldersWithUsfmToConvert) { +// projectFileContent = await importProjectAndConvertToJson( +// foldersWithUsfmToConvert +// ); +// } + +// const allBooks = books ? books : getAllBookRefs(); +// // Loop over all books and createCodexNotebook for each +// for (const book of allBooks) { +// /** +// * One notebook for each book of the Bible. Each notebook has a code cell for each chapter. +// * Each chapter cell has a preceding markdown cell with the chapter number, and a following +// * markdown cell that says '### Notes for Chapter {chapter number}' +// */ +// const cells: vscode.NotebookCellData[] = []; +// const chapterHeadingText = `# Chapter`; + +// // Iterate over all chapters in the current book +// for (const chapter of getAllBookChapterRefs(book)) { +// // Generate a markdown cell with the chapter number +// const cell = new vscode.NotebookCellData( +// vscode.NotebookCellKind.Markup, +// `${chapterHeadingText} ${chapter}`, +// "markdown" +// ); +// cell.metadata = { +// type: CellTypes.CHAPTER_HEADING, +// data: { +// chapter: chapter, +// }, +// }; +// cells.push(cell); +// const importedBook = projectFileContent?.find( +// (projectFile) => projectFile?.book?.bookCode === book +// ); + +// const verseRefText = importedBook?.chapters.find( +// (projectBookChapter) => projectBookChapter?.chapterNumber === chapter +// )?.contents; +// // Generate a code cell for the chapter +// const numberOfVrefsForChapter = +// vrefData[book].chapterVerseCountPairings[chapter]; +// const vrefsString = getAllVrefs( +// book, +// chapter, +// numberOfVrefsForChapter, +// verseRefText +// ); + +// cells.push( +// new vscode.NotebookCellData( +// vscode.NotebookCellKind.Code, +// vrefsString, +// "scripture" +// ) +// ); + +// // Generate a markdown cell for notes for the chapter +// cells.push( +// new vscode.NotebookCellData( +// vscode.NotebookCellKind.Markup, +// `### Notes for Chapter ${chapter}`, +// "markdown" +// ) +// ); +// } +// // Create a notebook for the current book +// const serializer = new CodexContentSerializer(); +// const notebookData = new vscode.NotebookData(cells); + +// // const project = await getProjectMetadata(); +// const notebookCreationPromise = serializer +// .serializeNotebook( +// notebookData, +// new vscode.CancellationTokenSource().token +// ) +// .then((notebookFile) => { +// // Save the notebook using generateFiles +// const filePath = `files/target/${book}.codex`; +// return generateFile({ +// filepath: filePath, +// fileContent: notebookFile, +// shouldOverWrite, +// }); +// }); +// notebookCreationPromises.push(notebookCreationPromise); +// } +// await Promise.all(notebookCreationPromises); +// } + + +async function generateNotebooks( filenameToPerf: { [filename: string]: Perf } ) { + + const workspaceFolder = getWorkSpaceFolder(); + + if (!workspaceFolder) { + throw new Error("No workspace folder found"); + } + + + const filenameToCells: { [filename: string]: vscode.NotebookCellData[] } = {}; + + let currentFilename = ""; + let currentChapter = -1; + let currentVerse = -1; + let currentChapterCell : vscode.NotebookCellData | undefined = undefined; + + //now generate the notebooks. + for( const [filename, perf] of Object.entries(filenameToPerf) ){ + + //https://stackoverflow.com/questions/175739/built-in-way-in-javascript-to-check-if-a-string-is-a-valid-number + const strippedFilename = (filename.split("/").pop()?.split( "." )[0] || "").split('').filter( (char) => char !== "-" && isNaN(char as unknown as number) ).join(''); + + const bookAbbreviation = perf.metadata?.document?.bookCode || perf.metadata?.document?.toc3 || + perf.metadata?.document?.h || perf.metadata?.document?.toc2 || strippedFilename; + + //h followed by toc2 followed by bookCode followed by toc3 followed by the filename with nothing except for letters + const bookName = perf.metadata?.document?.h || perf.metadata?.document?.toc2 || + perf.metadata?.document?.bookCode || perf.metadata?.document?.toc3 || strippedFilename; + + const references = getIndexedReferencesFromPerf(perf); + + for( const [stringReference, startIndex] of Object.entries(references.verses) ){ + const [chapter, verse] = stringReference.split(":").map(Number); + const reference = {chapter,verse}; + + const verseText = getAttributedVerseCharactersFromPerf( perf, reference, false, startIndex ) as string; + + //remove path and add .codex + const notebookFilename = `./files/target/${filename.split("/").pop()?.split( "." )[0] || ""}.codex`; + const notebookFilenameFullPath = path.join( workspaceFolder, notebookFilename ); + + //Check if the file already exists and if it does confirm with the user through vscode that the overwrite is ok. + if( currentFilename !== notebookFilename ){ + let fileExists = false; + try{ + await vscode.workspace.fs.stat(vscode.Uri.file(notebookFilenameFullPath)); + fileExists = true; + }catch(err){ + fileExists = false; + } + if( fileExists ){ + const overwrite = await vscode.window.showWarningMessage( + `Overwrite ${notebookFilename}?`, + { modal: true }, + "Yes"); + + if( !overwrite ) throw new Error("Overwrite cancelled"); + } + } + + //If the chapter or filename has changed then add the notes to the previous chapter if it exists. + if( (currentChapter != -1 && ((currentChapter !== reference.chapter) || (currentFilename && currentFilename !== notebookFilename))) ){ + filenameToCells[currentFilename].push( + new vscode.NotebookCellData( + vscode.NotebookCellKind.Markup, + `### Notes for Chapter ${currentChapter}`, + "markdown" + ) + ); + } + + + //if we are in a new filename, start a new cell group. + if( !(notebookFilename in filenameToCells) ) filenameToCells[notebookFilename] = []; + const cells = filenameToCells[notebookFilename]; + + //If we are in a new chapter, create the chapter header. + if( currentChapter != reference.chapter || (currentFilename && currentFilename !== notebookFilename)){ + const newCell = new vscode.NotebookCellData( + vscode.NotebookCellKind.Markup, + `# Chapter ${reference.chapter}`, + "markdown" + ); + newCell.metadata = { + type: CellTypes.CHAPTER_HEADING, + data: { + chapter: "" + reference.chapter + } + }; + if( reference.chapter == 1 ){ + newCell.metadata.perf = perf; + } + cells.push(newCell); + + currentChapterCell = undefined; + } + + //if we don't have a current cell create one. + if( currentChapterCell === undefined ){ + currentChapterCell = new vscode.NotebookCellData( + vscode.NotebookCellKind.Code, + "", + "scripture" + ); + cells.push( currentChapterCell ); + }else{ + //otherwise add a newline. + currentChapterCell!.value += `\n`; + } + + + currentFilename = notebookFilename; + currentChapter = reference.chapter; + currentVerse = reference.verse; + + const refString = `${bookAbbreviation} ${currentChapter}:${currentVerse}`; + const verseContent = importHacks(verseText); + + + currentChapterCell!.value += `${refString} ${verseContent}`; + + } + } + + //close out the last one. + if( currentFilename && currentChapter != -1 ){ + filenameToCells[currentFilename].push( + new vscode.NotebookCellData( + vscode.NotebookCellKind.Markup, + `### Notes for Chapter ${currentChapter}`, + "markdown" + ) + ); + } + + //now create the notebooks all in parallel. + const serializer = new CodexContentSerializer(); + await Promise.all( + Object.entries(filenameToCells).map( + async ([filePath, cells]) => { + const notebookData = new vscode.NotebookData(cells); + + return serializer.serializeNotebook( + notebookData, + new vscode.CancellationTokenSource().token + ).then((notebookFile) => { + // Save the notebook using generateFiles + return generateFiles({ + filepath: filePath, + fileContent: notebookFile, + shouldOverWrite: true, + }); + }); + } + ) + ); + + +} + +// this method is called when your extension is activated +// your extension is activated the very first time the command is executed +export function registerUsfmImporter(context: vscode.ExtensionContext) { + // Use the console to output diagnostic information (console.log) and errors (console.error) + // This line of code will only be executed once when your extension is activated + console.log('The importUsfm plugin is now active!'); + + // The command has been defined in the package.json file + // Now provide the implementation of the command with registerCommand + // The commandId parameter must match the command field in package.json + const import_disposable = vscode.commands.registerCommand('codex-editor-extension.importUsfm', async () => { + // The code you place here will be executed every time your command is executed + const importParameters = await getImportParameters(); + + //read the usfm data to a dictionary which maps from the filename to the loaded perf. + const filenameToPerf = await readUsfmData(importParameters.usfmFiles); + + await generateNotebooks(filenameToPerf); + + // Display a message box to the user + vscode.window.showInformationMessage('Usfm import complete.'); + }); + context.subscriptions.push(import_disposable); + + const export_disposable = vscode.commands.registerCommand('codex-editor-extension.exportUsfm', async () => { + + //vscode.window.showInformationMessage( "Usfm export not implemented" ); + + //show an information message with the name of the currently open vscode document. + //const currently_open_document = vscode.window.activeTextEditor?.document; + + const notebookEditor = vscode.window.activeNotebookEditor; + if (!notebookEditor) { + vscode.window.showErrorMessage('No active notebook editor found'); + return; + } + + + //const codex_filename = currently_open_document.fileName; + const codex_filename = notebookEditor.notebook.uri.fsPath; + + if( !codex_filename ){ + vscode.window.showInformationMessage( "No document open" ); + return; + } + + const codex_basename = path.basename(codex_filename); + + //make sure the extension of the filename is .codex. + if( !codex_basename.endsWith(".codex") ){ + vscode.window.showInformationMessage( "Filename must be a .codex file" ); + return; + } + + vscode.window.showInformationMessage( `Exporting ${codex_basename}` ); + + const exportParameters = await getExportParameters(codex_filename); + + await doUsfmExport(codex_filename, exportParameters); + + vscode.window.showInformationMessage( `Finished exporting ${codex_basename}` ); + }); + context.subscriptions.push(export_disposable); +} + diff --git a/src/usfmStuff/utils.ts b/src/usfmStuff/utils.ts new file mode 100644 index 000000000..397f6e9ba --- /dev/null +++ b/src/usfmStuff/utils.ts @@ -0,0 +1,1281 @@ + +//@ts-expect-error This library doesn't have types. +import {Proskomma} from 'proskomma-core'; +//@ts-expect-error This library doesn't have types. +import {PipelineHandler} from 'proskomma-json-tools'; + +import { Token } from 'wordmap-lexer'; +import { Alignment, Ngram, Prediction, Suggestion } from 'wordmap'; + +// eslint-disable-next-line @typescript-eslint/ban-ts-comment +// @ts-ignore +import * as stringTokenizer from "string-punctuation-tokenizer"; +import { TAttributedString } from './customizedJLDiff'; + +export interface TStrippedUsfm{ + version: number, + text: string +} +export interface TAlignmentData{ + version: number, + perf: any, +} + +export interface OptionalInternalUsfmJsonFormat{ + strippedUsfm?: TStrippedUsfm, + alignmentData?: TAlignmentData, +} +export interface InternalUsfmJsonFormat{ + strippedUsfm: TStrippedUsfm, + alignmentData: TAlignmentData, +} + +export interface UsfmMessage{ + command: string, + content?: OptionalInternalUsfmJsonFormat, + requestId?: number, + commandArg?: any, + response?: any, + error?: any, + } + +//The perf related types are not official, I add items to these types as +//I verify they exist. +interface PerfAttributes{ + number?: string, + "x-occurrence"?: string[], + "x-occurrences"?: string[], + "x-content"?: string[], + "x-lemma"?: string[], + lemma?: string[], + "x-morph"?: string[], + "x-strong"?: string[], + strong?: string[], +} + +export interface PerfContent{ + type?: string, + subtype?: string, + atts?: PerfAttributes, + content?: string[], +} + +interface PerfBlock{ + type?: string, + subtype?: string, + content?: (PerfContent | string)[], +} + + +//Define an interface PerfVerse which is an array of PerfBlock. +export interface PerfVerse extends Array {} + +interface PerfAlignment{ + +} + +interface PerfSequence{ + blocks?: PerfBlock[], +} + +export interface PerfMetadataDocument{ + bookCode?: string, + h?: string, + toc?: string, + toc2?: string, + toc3?: string, +} + +interface PerfMetadata{ + document?: PerfMetadataDocument, +} + +export interface Perf{ + metadata?: PerfMetadata, + sequences?: { [key: string]:PerfSequence}, + main_sequence_id?: string, +} + +export const SECONDARY_WORD = 'secondaryWord'; +export const PRIMARY_WORD = 'primaryWord'; + + +//Copied this type from alignments-transferer. Commenting stuff in when they get touched. + +export interface TWord{ + type: string; + + occurrence?: number; + occurrences?: number; + + // position?: number; + + // //Sometimes it is word sometimes it is text. + // word?: string; //usfm format uses word + text?: string; //alignment uses text. + + // content?: string; + // endTag?: string; + lemma?: string; + morph?: string; + // strongs?: string; //something was using strongs, I forget + strong?: string; //alignment dialog uses strong + // tag?: string; + + // children?: TWord[]; + + disabled?: boolean; //Makes it look used in the word bank. + + index?: number; +} + +export interface TWordAlignerAlignmentResult{ + targetWords: TWord[]; + verseAlignments: TSourceTargetAlignment[]; +} + + +export interface TSourceTargetAlignment{ + sourceNgram: TWord[]; + targetNgram: TWord[]; +} + +export interface TSourceTargetPrediction{ + alignment: TSourceTargetAlignment; + confidence: number; +} + +export interface TAlignmentSuggestion{ + predictions: TSourceTargetPrediction[]; + confidence: number; +} +/* + export interface TSourceTargetSuggestion{ + alignment: TSourceTargetAlignment; + confidence: number; + } + + + interface TTopBottomAlignment{ + topWords: TWord[]; + bottomWords: TWord[]; + } + + export interface TAlignerData{ + wordBank: TWord[]; + alignments: TSourceTargetAlignment[]; + } + +*/ + interface TReference{ + chapter: number; + verse: number; + } /* + + interface TContextId{ + reference: TReference; + } + + interface TUsfmVerse{ + verseObjects: TWord[]; + } + + type TUsfmChapter = {[key:string]:TUsfmVerse}; + + interface TUsfmHeader{ + tag: string; + content: string; + } + + interface TUsfmBook{ + headers: TUsfmHeader[]; + chapters: {[key:string]:TUsfmChapter}; + } + + export interface TWordAlignerAlignmentResult{ + targetWords: TWord[]; + verseAlignments: TSourceTargetAlignment[]; + } + + + //I don't need this react interface declared on the server side of the project. + + + // interface SuggestingWordAlignerProps { + // style: {[key: string]: string }; + // verseAlignments: TSourceTargetAlignment; + // targetWords: TWord[]; + // translate: (key:string)=>string; + // contextId: TContextId; + // targetLanguage: string; + // targetLanguageFont: {}; + // sourceLanguage: string; + // showPopover: (PopoverTitle: string, wordDetails: string, positionCoord: string, rawData: any) => void; + // lexicons: {}; + // loadLexiconEntry: (arg:string)=>{[key:string]:string}; + // onChange: (results: TWordAlignerAlignmentResult) => void; + // suggester: ((sourceSentence: string | Token[], targetSentence: string | Token[], maxSuggestions?: number, manuallyAligned: Alignment[] = []) => Suggestion[]) | null; + // } + // export class SuggestingWordAligner extends React.Component{} + + //function removeUsfmMarkers(verse: UsfmVerse):string; + //function usfmVerseToJson(); + + + + + export module usfmHelpers { + export function removeUsfmMarkers(targetVerseText: string): string; + } + + export module AlignmentHelpers{ + export function getWordListFromVerseObjects( verseObjects: TWord[] ): Token[]; + export function markTargetWordsAsDisabledIfAlreadyUsedForAlignments(targetWordList: Token[], alignments: TSourceTargetAlignment[]):TWord[]; + export function addAlignmentsToVerseUSFM( wordBankWords: TWord[], verseAlignments: any, targetVerseText: string ): string; + //I see that Algnments is not spelled correctly, it is this way in the library. + export function areAlgnmentsComplete( targetWords: TWord[], verseAlignments: TSourceTargetAlignment[] ): boolean; + } + */ + +export interface TTrainingAndTestingData { + alignments: { + [key: string]: { + targetVerse: TWord[]; + sourceVerse: TWord[]; + alignments: TSourceTargetAlignment[]; + } + }; + corpus: { + [key: string]: { + sourceTokens: TWord[]; + targetTokens: TWord[]; + } + }; +} + +export function deepCopy(obj: any): any { + return JSON.parse(JSON.stringify(obj)); +} + + +export function usfmToPerf( usfm: string ): Perf { + const pk = new Proskomma(); + pk.importDocument({lang: "xxx", abbr: "yyy"}, "usfm", usfm); + return JSON.parse(pk.gqlQuerySync("{documents {perf}}").data.documents[0].perf); +} + +export function pullVerseFromPerf( reference: string, perf: Perf, index: TBlockContentIndex | undefined = undefined ): PerfVerse | undefined { + if( !reference ) return undefined; + + const referenceParts = reference.split(":"); + + if( referenceParts.length != 2 ) return undefined; + + const chapter : string = referenceParts[0]; + const verse : string = referenceParts[1]; + + + let currentChapter : string = "-1"; + let currentVerse : string = "-1"; + + const collectedContent : any[] = []; + + let firstIteration = true; + + //first iterate the chapters. + //perf.sequences[perf.main_sequence_id].blocks is an array. + for( let blockIndex = 0; blockIndex < (perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.length ?? 0); blockIndex++ ){ + let block = perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.[blockIndex]; + + if( block != undefined && block.type == 'paragraph' ){ + for( let contentIndex = 0; contentIndex < (block?.content?.length ?? 0); contentIndex++ ){ + let content = block?.content?.[contentIndex]; + + //see if we were passed in a cheater starter index. + if( firstIteration && index !== undefined ){ + firstIteration = false; + blockIndex = index.b; + contentIndex = index.c; + currentChapter = chapter; + currentVerse = verse; + block = perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks?.[blockIndex]; + content = block?.content?.[contentIndex]; + } + + if( typeof(content) == 'object' && content.type == 'mark' ){ + if( content.subtype == 'chapter' ){ + currentChapter = content?.atts?.number ?? "-1"; + }else if( content.subtype == 'verses' ){ + currentVerse = content?.atts?.number ?? "-1"; + } + //if we have changed the reference and we have already + //collected content, then we can stop scanning and just return + if( collectedContent.length > 0 && (currentChapter != chapter || currentVerse != verse) ){ + return collectedContent; + } + }else{ + //if we are in the correct reference then collect the content. + if( currentChapter == chapter && currentVerse == verse ){ + collectedContent.push( content ); + } + } + } + } + } + + return collectedContent; +} + + +export function pullVersesFromPerf( perf: Perf ): { [key: string]: PerfVerse } { + let currentChapter : string = "-1"; + let currentVerse : string = "-1"; + + const collectedContent : { [key: string]: PerfVerse } = {}; + + //first iterate the chapters. + //perf.sequences[perf.main_sequence_id].blocks is an array. + for( const [blockIndex, block] of (perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? []).entries() ){ + if( block.type == 'paragraph' ){ + for( const [contentIndex, content] of (block.content ?? []).entries() ){ + if( typeof(content) == 'object' && content.type == 'mark' ){ + if( content.subtype == 'chapter' ){ + currentChapter = content?.atts?.number ?? "-1"; + }else if( content.subtype == 'verses' ){ + currentVerse = content?.atts?.number ?? "-1"; + } + }else{ + if( currentChapter !== "-1" && currentVerse !== "-1" ){ + const currentReference = `${currentChapter}:${currentVerse}`; + if( !collectedContent[currentReference] ){ + collectedContent[currentReference] = []; + } + + collectedContent[currentReference].push( content ); + } + } + } + } + } + return collectedContent; +} + +export function chopUpPerfIntoChaptersAndVerses( filenamesToPerf: { [filename: string]: Perf } ): { [filename: string]: {[chapter: number]: {[verse: number]: PerfVerse }} } { + const result : { [filename: string]: {[chapter: number]: {[verse: number]: PerfVerse}}} = {}; + + Object.entries(filenamesToPerf).forEach(([filename, perf]) => { + const verses = pullVersesFromPerf(perf); + + Object.entries(verses).forEach(([reference, verse]) => { + const referenceParts = reference.split(":"); + + if( referenceParts.length != 2 ) return; + + const chapterNumber : number = parseInt(referenceParts[0]); + const verseNumber : number = parseInt(referenceParts[1]); + + if( !(filename in result) ) result[filename] = {}; + + const filenameResult = result[filename]; + + if( !(chapterNumber in filenameResult) ) filenameResult[chapterNumber] = {}; + const chapterResult = filenameResult[chapterNumber]; + + if( !(verseNumber in chapterResult) ) chapterResult[verseNumber] = []; + const verseResult = chapterResult[verseNumber]; + + verseResult.push(...verse); + }); + }); + + + return result; +} + +export function replacePerfVerseInPerf( perf :Perf, perfVerse: PerfVerse, reference : string ){ + if( !reference ) return undefined; + + const referenceParts = reference.split(":"); + + if( referenceParts.length != 2 ) return undefined; + + const chapter : string = referenceParts[0]; + const verse : string = referenceParts[1]; + + + let currentChapter : string = "-1"; + let currentVerse : string = "-1"; + + const newMainSequenceBlocks : any[] = []; + + //iterate the chapters. + for( const block of perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? [] ){ + if( block.type == 'paragraph' ){ + const newContent = []; + for( const content of block.content ?? [] ){ + let dropContent = false; + let pushNewVerse = false; + if( typeof(content) == 'object' && content.type == 'mark' ){ + if( content.subtype == 'chapter' ){ + currentChapter = content.atts?.number ?? "-1"; + }else if( content.subtype == 'verses' ){ + currentVerse = content.atts?.number ?? "-1"; + + //if the chapter and verse are correct, then dump the inserted content in. + if( currentChapter == chapter && currentVerse == verse ){ + //I set a flag here instead of just push it because + //the content has to be pushed after the verse indicator + pushNewVerse = true; + } + } + }else{ + //if we are in the existing verse, then drop all existing content + //so that the inserted content is not doubled. + if( currentChapter == chapter && currentVerse == verse ){ + dropContent = true; + } + } + if( !dropContent ){ newContent.push( content );} + if( pushNewVerse ){ newContent.push( ...perfVerse );} + } + newMainSequenceBlocks.push( { + ...block, + content: newContent + }); + }else{ + newMainSequenceBlocks.push( block ); + } + } + + const newPerf = { + ...perf, + sequences: { + ...perf.sequences, + [perf?.main_sequence_id ?? ""]: { + ...perf?.sequences?.[perf?.main_sequence_id ?? ""], + blocks: newMainSequenceBlocks + } + } + }; + + return newPerf; +} + +// /** +// * Adds the indexing location into tokens similar to tokenizeWords in Lexer. +// * https://github.com/unfoldingWord/wordMAP-lexer/blob/develop/src/Lexer.ts#L20 +// * @param inputTokens - an array Wordmap Token objects. +// * @param sentenceCharLength - the length of the sentence in characters +// */ +// export function updateTokenLocations(inputTokens : Token[], sentenceCharLength : number = -1) : void { +// if (sentenceCharLength === -1) { +// sentenceCharLength = inputTokens.map( t => t.toString() ).join(" ").length; +// } + +// //const tokens: {text: string, position: number, characterPosition: number, sentenceTokenLen: number, sentenceCharLen: number, occurrence: number}[] = []; +// let charPos = 0; +// let tokenCount = 0; +// const occurrenceIndex : {[key: string]: number }= {}; +// for (const inputToken of inputTokens) { +// if (!occurrenceIndex[inputToken.toString()]) { +// occurrenceIndex[inputToken.toString()] = 0; +// } +// occurrenceIndex[inputToken.toString()] += 1; +// (inputToken as any).inputToken.tokenPos = tokenCount; +// (inputToken as any).charPos = charPos; +// (inputToken as any).sentenceTokenLen = inputTokens.length; +// (inputToken as any).sentenceCharLen = sentenceCharLength; +// (inputToken as any).tokenOccurrence = occurrenceIndex[inputToken.toString()]; +// tokenCount++; +// charPos += inputToken.toString().length; +// } + +// // Finish adding occurrence information +// for( const t of inputTokens){ +// (t as any).tokenOccurrences = occurrenceIndex[t.toString()]; +// } +// } + + + +export function wordmapTokenToTWord( token: Token, type: string ): TWord { + return { + type, + occurrence: token.occurrence, + occurrences: token.occurrences, + text: token.toString(), + lemma: token.lemma, + morph: token.morph, + strong: token.strong, + disabled: false, + index: token.position, + }; +} + +export function tWordToWordmapToken( tWord: TWord ): Token { + return new Token( tWord ); +} + +export function wordMapAlignmentToTSourceTargetAlignment( alignment: Alignment ): TSourceTargetAlignment { + return { + sourceNgram: alignment.sourceNgram.getTokens().map( token => wordmapTokenToTWord( token, PRIMARY_WORD ) ), + targetNgram: alignment.targetNgram.getTokens().map( token => wordmapTokenToTWord( token, SECONDARY_WORD) ), + }; +} + +export function tSourceTargetAlignmentToWordmapAlignment( tSourceTargetAlignment: TSourceTargetAlignment ): Alignment { + return new Alignment( + new Ngram( tSourceTargetAlignment.sourceNgram.map( tWordToWordmapToken ) ), + new Ngram( tSourceTargetAlignment.targetNgram.map( tWordToWordmapToken ) ) + ); +} + +export function tSourceTargetPredictionToWordmapPrediction( tSourceTargetPrediction: TSourceTargetPrediction ): Prediction { + const prediction: Prediction = new Prediction( tSourceTargetAlignmentToWordmapAlignment(tSourceTargetPrediction.alignment) ); + prediction.setScore( "confidence", tSourceTargetPrediction.confidence ); + return prediction; +} + +export function wordmapPredictionToTSourceTargetPrediction( prediction: Prediction ): TSourceTargetPrediction { + return { + alignment: wordMapAlignmentToTSourceTargetAlignment( prediction.alignment ), + confidence: prediction.getScore("confidence") + }; +} + +export function tAlignmentSuggestionToWordmapSuggestion( tAlignmentSuggestion: TAlignmentSuggestion ): Suggestion { + const predictions: Prediction[] = tAlignmentSuggestion.predictions.map( tSourceTargetPredictionToWordmapPrediction ); + const suggestion: Suggestion = new Suggestion( ); + //The tokens in the prediction don't have their index set so using the addPrediction gets the alignments all of order. + //predictions.forEach( prediction => suggestion.addPrediction( prediction ) ); + (suggestion as any).predictions.push( ...predictions ); + return suggestion; +} + +export function wordmapSuggestionToTAlignmentSuggestion( suggestion: Suggestion ): TAlignmentSuggestion { + return { + predictions: suggestion.getPredictions().map( prediction => wordmapPredictionToTSourceTargetPrediction( prediction ) ), + confidence: suggestion.compoundConfidence() + }; +} + + +function perfContentToTWord( perfContent: any | string, type: string ): TWord { + const word : TWord = { + type + }; + if( typeof( perfContent ) == "string" ) { + word["text" ] = perfContent; + }else{ + if (perfContent?.atts?.["x-occurrence" ] ) { word["occurrence" ] = parseInt(perfContent.atts["x-occurrence" ].join(" ")); } + if (perfContent?.atts?.["x-occurrences"] ) { word["occurrences"] = parseInt(perfContent.atts["x-occurrences"].join(" ")); } + if (perfContent?.atts?.["x-content" ] ) { word["text" ] = perfContent.atts["x-content" ].join(" "); } + if (perfContent?. ["content" ] ) { word["text" ] = perfContent.content .join(" "); } + if (perfContent?.atts?.["x-lemma" ] ) { word["lemma" ] = perfContent.atts["x-lemma" ].join(" "); } + if (perfContent?.atts?.["lemma" ] ) { word["lemma" ] = perfContent.atts["lemma" ].join(" "); } + if (perfContent?.atts?.["x-morph" ] ) { word["morph" ] = perfContent.atts["x-morph" ].join(","); } + if (perfContent?.atts?.["x-strong" ] ) { word["strong" ] = perfContent.atts["x-strong" ].join(" "); } + if (perfContent?.atts?.["strong" ] ) { word["strong" ] = perfContent.atts["strong" ].join(" "); } + } + return word; +} + + +function computeOccurrenceInformation( words: TWord[] ){ + const wordsCopy = deepCopy( words ); + const occurrenceMap = new Map(); + for( const word of wordsCopy ){ + const occurrence = (occurrenceMap.get( word.text ) || 0) + 1; + occurrenceMap.set( word.text, occurrence ); + word.occurrence = occurrence; + } + for( const word of wordsCopy ){ + word.occurrences = occurrenceMap.get( word.text ); + } + return wordsCopy; +} + +export function extractWrappedWordsFromPerfVerse( perfVerse: PerfVerse, type: string, reindexOccurrences: boolean = false ): TWord[] { + let wrappedWords : TWord[] = []; + let inMapping = false; + let index = 0; + for( const content of perfVerse ){ + //If content is a string just skip it. It is like commas and stuff. + if( typeof content == 'string' ){ + //pass + }else if( content.type == "wrapper" && content.subtype == "usfm:w" ){ + const wrappedWord = perfContentToTWord( content, type ); + wrappedWord.disabled = inMapping; //If the word is mapped then disable it for the wordBank. + wrappedWord.index = index++; + wrappedWords.push( wrappedWord ); + }else if( content.type == "start_milestone" && content.subtype == "usfm:zaln" ){ + inMapping = true; + }else if( content.type == "end_milestone" && content.subtype == "usfm:zaln" ){ + //I know the end_milestone can come in clumps, but this works anyways. + inMapping = false; + } + } + //recompute occurrence information if it doesn't exist. + if( wrappedWords.length > 0 && (!wrappedWords[0].occurrence || reindexOccurrences) ){ + wrappedWords = computeOccurrenceInformation( wrappedWords ); + } + return wrappedWords; +} + +export function extractAlignmentsFromPerfVerse( perfVerse: PerfVerse ): TSourceTargetAlignment[] { + const alignments : TSourceTargetAlignment[] = []; + const sourceStack : any[] = []; + const targetStack : any[] = []; + + //we need to stash alignments as we make them so that further words that get + //added to them can get poked into existing ones. + const sourceNgramHashToAlignment = new Map(); + + let targetIndex = 0; + for( const content of perfVerse ){ + + if( typeof(content) == 'object' && content.type == "start_milestone" && content.subtype == "usfm:zaln" ){ + //we can't index the source words right now because they are out of order. + //we will do it later when the alignments are supplemented with the unused source words. + sourceStack.push( perfContentToTWord(content, PRIMARY_WORD) ); + + //If there are any target words then just drop them because they aren't part of this + //group. + targetStack.length = 0; + }else if( typeof(content) == 'object' && content.type == "end_milestone" && content.subtype == "usfm:zaln" ){ + //process the source and target stacks when we are a place where we are popping + if( targetStack.length > 0 ){ + const sourceNgram = [...sourceStack]; + const targetNgram = [...targetStack]; + + const sourceNgramHash = hashNgramToString( sourceNgram ); + + //If we have already seen the source ngram then add the target ngram to it + if( !sourceNgramHashToAlignment.has( sourceNgramHash ) ){ + const newAlignment = { sourceNgram, targetNgram }; + sourceNgramHashToAlignment.set( sourceNgramHash, newAlignment ); + alignments.push( newAlignment ); + }else{ + const existingAlignment = sourceNgramHashToAlignment.get( sourceNgramHash ); + existingAlignment.targetNgram = [...existingAlignment.targetNgram, ...targetNgram]; + } + //clear the targetStack + targetStack.length = 0; + } + + sourceStack.pop(); + }else if( typeof(content) == 'object' && content.type == "wrapper" && content.subtype == "usfm:w" ){ + const wrappedWord = perfContentToTWord( content, SECONDARY_WORD ); + wrappedWord.index = targetIndex++; + targetStack.push( wrappedWord ); + } + } + return alignments; +} + +function hashWordToString( word: TWord ){ + return `${word.text}-${word.occurrence}-${word.occurrences}`; +} + +function hashNgramToString( ngram: TWord[] ){ + return ngram?.map( ( word: TWord ) => hashWordToString( word ) )?.join("/"); +} + +export function sortAndSupplementFromSourceWords( sourceWords:any, alignments:any ){ + //Hash the source word list so that we can find them when going through the alignment source words. + const sourceWordHashToSourceWord = Object.fromEntries( sourceWords.map( ( word : any ) => { + return [ hashWordToString( word ), word ]; + })); + //now hash all the sources to indicate which ones are represented so we can add the ones which are not. + const sourceWordHashToExistsBool = alignments.reduce( (acc:any, cur:any) => { + cur.sourceNgram.forEach( ( word :any ) => { + acc[ hashWordToString( word ) ] = true; + }); + return acc; + }, {}); + + //now create an array of the sourceWords which are not represented. + const newSourceWords = sourceWords.filter( ( word : any ) => { + return !( hashWordToString( word ) in sourceWordHashToExistsBool ); + }); + + //now create bogus alignments for the new source words. + const newAlignments = newSourceWords.map( ( word : any ) => { + //return a bogus alignment + return { + sourceNgram: [ word ], + targetNgram: [] + }; + }); + + //Now create a new list which has both the new alignments and the old alignments + const combinedAlignments = alignments.concat( newAlignments ); + + //Get the index set on all the source words in the alignment. + const sourceIndexedAlignments = combinedAlignments.map( ( alignment : any, index : number ) => { + const indexedSourceNgram = alignment.sourceNgram.map( ( sourceWord : any ) => { + return { + ...sourceWord, + index: sourceWordHashToSourceWord[ hashWordToString( sourceWord ) ]?.index ?? -1 + }; + }); + return { + ...alignment, + sourceNgram: indexedSourceNgram + }; + }); + + //now sort the alignment based on index of the first source word. + sourceIndexedAlignments.sort( ( a : any, b : any ) => { + return a.sourceNgram[0].index - b.sourceNgram[0].index; + }); + + //now give each alignment an index. + const indexedAlignments = sourceIndexedAlignments.map( ( alignment : any, index : number ) => { + return { + ...alignment, + index + }; + }); + + return indexedAlignments; +} + +export function reindexPerfVerse( perfVerse: PerfVerse, doDeepCopy: boolean = true ): PerfVerse { + let perfVerseCopy = perfVerse; + if( doDeepCopy ){ + perfVerseCopy = deepCopy( perfVerseCopy ); + } + const occurrenceMap = new Map(); + for( const perfContent of perfVerseCopy ){ + if( typeof perfContent === "object" && + ("type" in perfContent) && perfContent.type == "wrapper" && + ("subtype" in perfContent) && perfContent.subtype == "usfm:w" ){ + const text = (perfContent?.["content"])?perfContent.content.join(" "):""; + const occurrence = (occurrenceMap.get( text ) || 0) + 1; + occurrenceMap.set( text, occurrence ); + if( !perfContent.atts ) perfContent.atts = {}; + perfContent.atts["x-occurrence" ] = [ "" + occurrence ]; + } + } + for( const perfContent of perfVerseCopy ){ + if( typeof perfContent === "object" && + ("type" in perfContent) && perfContent.type == "wrapper" && + ("subtype" in perfContent) && perfContent.subtype == "usfm:w" ){ + const text = (perfContent?.["content"])?perfContent.content.join(" "):""; + if( !perfContent.atts ) perfContent.atts = {}; + perfContent.atts["x-occurrences" ] = [ "" + occurrenceMap.get( text ) ]; + } + } + return perfVerseCopy; +} + + +export async function mergeAlignmentPerf( strippedUsfmPerf: Perf, strippedAlignment: PerfAlignment ): Promise { + try{ + const pipelineH = new PipelineHandler({proskomma: new Proskomma()}); + const mergeAlignmentPipeline_output = await pipelineH.runPipeline('mergeAlignmentPipeline', { + perf: strippedUsfmPerf, + strippedAlignment, + }); + return mergeAlignmentPipeline_output.perf; + }catch( e ){ + console.log( e ); + } + return undefined; +} + +export function replaceAlignmentsInPerfVerse( perfVerse: PerfVerse, newAlignments: TSourceTargetAlignment[] ): PerfVerse{ + const result : PerfVerse = []; + + const withoutOldAlignments = perfVerse.filter( ( perfContent : any ) => { + if( ("type" in perfContent) && + (perfContent.type == "start_milestone" || perfContent.type == "end_milestone") && + ("subtype" in perfContent) && perfContent.subtype == "usfm:zaln" ){ + return false; + } + return true; + }); + + //this indicates what the current source alignment stack is so we know when it needs to change. + let currentSourceAlignmentHash = ""; + let currentSourceAlignmentLength = 0; + + //hash each of the target words to the alignment which contains them. + const targetWordHashToAlignment = new Map(); + for( const alignment of newAlignments ){ + for( const targetWord of alignment.targetNgram ){ + targetWordHashToAlignment.set( hashWordToString( targetWord ), alignment ); + } + } + + const closeSourceRange = () => { + //we can just put it at the end but we will instead look backwards and find the last place + //a word wrapper is and put it after that. + let lastWordIndex = result.length - 1; + while( lastWordIndex >= 0 && typeof(result[lastWordIndex]) == "object" && + !(( "type" in (result[lastWordIndex] as PerfBlock)) && (result[lastWordIndex] as PerfBlock).type == "wrapper" && + ( "subtype" in (result[lastWordIndex] as PerfBlock)) && (result[lastWordIndex] as PerfBlock).subtype == "usfm:w") ){ + lastWordIndex--; + } + + //take out the old source alignment + //by inserting in after lastWordIndex + for( let i = 0; i < currentSourceAlignmentLength; i++ ){ + const newEndMilestone : any = { + type: "end_milestone", + subtype: "usfm:zaln" + }; + result.splice( lastWordIndex + i + 1, 0, newEndMilestone ); + } + }; + + for( const perfContent of withoutOldAlignments ){ + //Only do something different if it is a wrapped word. + if( typeof( perfContent ) == "object" && ("type" in perfContent) && perfContent.type == "wrapper" && + ("subtype" in perfContent) && perfContent.subtype == "usfm:w" ){ + + const relevantAlignment = targetWordHashToAlignment.get( hashWordToString( perfContentToTWord( perfContent, SECONDARY_WORD ) ) ); + + //If the current currentSourceAlignmentHash is not correct and it is set we need to close it out. + if( currentSourceAlignmentHash != (hashNgramToString(relevantAlignment?.sourceNgram) ?? "") ){ + closeSourceRange(); + + //add in the new alignment. + if( relevantAlignment ){ + for( const sourceToken of relevantAlignment.sourceNgram ){ + const newStartMilestone : any= { + type: "start_milestone", + subtype: "usfm:zaln", + atts: {} + }; + if( ("strong" in sourceToken) ){ newStartMilestone.atts["x-strong" ] = [ "" + sourceToken.strong ]; } + if( ("lemma" in sourceToken) ){ newStartMilestone.atts["x-lemma" ] = [ "" + sourceToken.lemma ]; } + if( ("morph" in sourceToken) ){ newStartMilestone.atts["x-morph" ] = sourceToken.morph.split(","); } + if( ("occurrence" in sourceToken) ){ newStartMilestone.atts["x-occurrence" ] = [ "" + sourceToken.occurrence ]; } + if( ("occurrences" in sourceToken) ){ newStartMilestone.atts["x-occurrences" ] = [ "" + sourceToken.occurrences ]; } + if( ("text" in sourceToken) ){ newStartMilestone.atts["x-content" ] = [ "" + sourceToken.text ]; } + result.push( newStartMilestone ); + } + currentSourceAlignmentHash = hashNgramToString(relevantAlignment.sourceNgram); + currentSourceAlignmentLength = relevantAlignment.sourceNgram.length; + }else{ + currentSourceAlignmentHash = ""; + currentSourceAlignmentLength = 0; + } + } + } + + result.push( perfContent ); + } + + + //now close out any remaining source alignment. + closeSourceRange(); + currentSourceAlignmentHash = ""; + currentSourceAlignmentLength = 0; + + //Note, this will not work correctly if the alignment spans multiple verses. But we have issues otherwise if this is the case. + + return result; +} + +export function replaceAlignmentsInPerfInPlace( perf: Perf, chapter: number, verse: number, index: TBlockContentIndex, newAlignments: TSourceTargetAlignment[] ){ + + let currentSourceAlignmentHash = ""; + let currentSourceAlignmentLength = 0; + + //hash each of the target words to the alignment which contains them. + const targetWordHashToAlignment = new Map(); + for( const alignment of newAlignments ){ + for( const targetWord of alignment.targetNgram ){ + targetWordHashToAlignment.set( hashWordToString( targetWord ), alignment ); + } + } + + + const contentIterator = {...index}; + + const closeSourceRange = () => { + if( currentSourceAlignmentLength == 0 ) return; + //we can just put it at the end but we will instead look backwards and find the last place + //a word wrapper is and put it after that. + const lastWordIndex = {b: contentIterator.b, c: contentIterator.c-1 }; + if( lastWordIndex.c < 0 && lastWordIndex.b > 0 ){ + lastWordIndex.b -= 1; + lastWordIndex.c = perf.sequences?.[perf.main_sequence_id ?? ""]?.blocks?.[lastWordIndex.b]?.content?.length ?? 0; + } + + // eslint-disable-next-line no-constant-condition + while( lastWordIndex.b > index.b || lastWordIndex.b == index.b && lastWordIndex.c > index.c ){ + const lastWord = perf.sequences?.[perf.main_sequence_id ?? ""]?.blocks?.[lastWordIndex.b]?.content?.[lastWordIndex.c]; + if( typeof(lastWord) == "object" && "type" in lastWord ){ + if( lastWord.type == "wrapper" && lastWord.subtype == "usfm:w" ) break; + } + lastWordIndex.c--; + if( lastWordIndex.c < 0 && lastWordIndex.b > 0 ){ + lastWordIndex.b -= 1; + if( lastWordIndex.b < 0 ) break; + lastWordIndex.c = perf.sequences?.[perf.main_sequence_id ?? ""]?.blocks?.[lastWordIndex.b]?.content?.length ?? 0; + } + } + //take out the old source alignment + //by inserting in after lastWordIndex + for( let i = 0; i < currentSourceAlignmentLength; i++ ){ + const newEndMilestone : any = { + type: "end_milestone", + subtype: "usfm:zaln" + }; + perf.sequences?.[perf.main_sequence_id ?? ""]?.blocks?.[lastWordIndex.b]?.content?.splice( lastWordIndex.c + 1, 0, newEndMilestone ); + + //now if we are in the same block, inc our index. + if( lastWordIndex.b == contentIterator.b ) contentIterator.c++; + } + }; + + + //iterate from the index specified until we hit a verse or chapter mark which is not what we want. + //while we go drop all old alignments and insert the new ones. + //We iterate with a block and content index and whenever we remove or add something we will + //modify the iterator in place as well. + blockLoop: for( contentIterator.b = index.b; contentIterator.b < (perf.sequences?.[perf.main_sequence_id ?? ""]?.blocks?.length ?? 0); contentIterator.b++ ){ + + const contentArray = perf.sequences?.[perf.main_sequence_id ?? ""]?.blocks?.[contentIterator.b]?.content ?? []; + for( contentIterator.c = ((contentIterator.b == index.b)?index.c:0); contentIterator.c < contentArray.length; contentIterator.c++ ){ + const content = contentArray[contentIterator.c]; + + if( typeof(content) == "object" && ("type" in content)) { + if( content.type == "mark"){ + if( ("subtype" in content) && content.subtype == "verses" ){ + if( parseInt(content?.atts?.number ?? '-1',10) != verse ) break blockLoop; //once we hit the wrong verse we are done. + }else if( ("subtype" in content) && content.subtype == "chapter" ){ + if( parseInt(content?.atts?.number ?? '-1',10) != chapter ) break blockLoop; //once we hit the wrong chapter we are done. + } + }else if( content.type == "start_milestone" && content.subtype == "usfm:zaln" ){ + //remove the old alignment. + contentArray.splice( contentIterator.c, 1 ); + contentIterator.c--; //decrement the index so that next loop we are on the next item. + }else if( content.type == "end_milestone" && content.subtype == "usfm:zaln" ){ + //remove the old alignment. + contentArray.splice( contentIterator.c, 1 ); + contentIterator.c--; //decrement the index so that next loop we are on the next item. + }else if( content.type == "wrapper" && content.subtype == "usfm:w" ){ + //test if this is an empty word. + if( content.content?.join("").length == 0 ){ + //just drop it. + contentArray.splice( contentIterator.c, 1 ); + contentIterator.c--; //decrement the index so that next loop we are on the next item. + }else{ + const relevantAlignment = targetWordHashToAlignment.get( hashWordToString( perfContentToTWord( content, SECONDARY_WORD ) ) ); + + + //see if this is part of the current source map. Otherwise close the current source map. + if( currentSourceAlignmentHash != (hashNgramToString(relevantAlignment?.sourceNgram) ?? "") ){ + closeSourceRange(); + //add in the new alignment. + if( relevantAlignment ){ + for( const sourceToken of relevantAlignment.sourceNgram ){ + const newStartMilestone : any= { + type: "start_milestone", + subtype: "usfm:zaln", + atts: {} + }; + if( ("strong" in sourceToken) ){ newStartMilestone.atts["x-strong" ] = [ "" + sourceToken.strong ]; } + if( ("lemma" in sourceToken) ){ newStartMilestone.atts["x-lemma" ] = [ "" + sourceToken.lemma ]; } + if( ("morph" in sourceToken) ){ newStartMilestone.atts["x-morph" ] = sourceToken.morph.split(","); } + if( ("occurrence" in sourceToken) ){ newStartMilestone.atts["x-occurrence" ] = [ "" + sourceToken.occurrence ]; } + if( ("occurrences" in sourceToken) ){ newStartMilestone.atts["x-occurrences" ] = [ "" + sourceToken.occurrences ]; } + if( ("text" in sourceToken) ){ newStartMilestone.atts["x-content" ] = [ "" + sourceToken.text ]; } + + contentArray.splice( contentIterator.c, 0, newStartMilestone ); + contentIterator.c++; + } + currentSourceAlignmentHash = hashNgramToString(relevantAlignment.sourceNgram); + currentSourceAlignmentLength = relevantAlignment.sourceNgram.length; + }else{ + currentSourceAlignmentHash = ""; + currentSourceAlignmentLength = 0; + } + + } + } + } + }else if( typeof(content) == "string" ){ + //if the string is zero length drop it. + if( content.length == 0 ){ + contentArray.splice( contentIterator.c, 1 ); + contentIterator.c--; //decrement the index so that next loop we are on the next item. + }else if( contentIterator.c > 0 && typeof(contentArray[contentIterator.c-1]) == "string" ){ + //we are a string right after a string. If this is the case we will merge the two. + contentArray[contentIterator.c-1] += content; + contentArray.splice( contentIterator.c, 1 ); + contentIterator.c--; //decrement the index so that next loop we are on the next item. + } + } + } + } + + //now close out any remaining source alignment. + closeSourceRange(); + currentSourceAlignmentHash = ""; + currentSourceAlignmentLength = 0; + + //Note, this will not work correctly if the alignment spans multiple verses. But we have issues otherwise if this is the case. +} + + +/** + * Asynchronously retrieves the source map. The source map is what + * maps source files (greek, hebrew) from the target files that you are working + * with. + * @return {Promise<{ [key: string]: string[] }>} The retrieved source map + */ +export async function getSourceFolders( getConfiguration: (key: string) => Promise ) : Promise< string[] >{ + + console.log( "requesting sourceFolders." ); + + //let sourceFolders : string[] | undefined = vscode.workspace?.getConfiguration("usfmEditor").get("sourceFolders" ); + let sourceFolders : string[] | undefined = await getConfiguration( "sourceFolders" ); + + //if sourceFolders is undefined, then get the default. + if( sourceFolders === undefined ) { sourceFolders = []; } + + //if sourceFolders is a string wrap it in an array. + if( typeof sourceFolders === 'string' ){ sourceFolders = [sourceFolders]; } + + + return sourceFolders; +} + + + +export function getReferencesFromPerf( perf: Perf ): TReference[] { + const result : TReference[] = []; + let currentChapter = -1; + let currentVerse = -1; + let lastChapter = -1; + let lastVerse = -1; + + for( const block of perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? [] ){ + if( block.type == 'paragraph' ){ + for( const content of (block.content ?? []) ){ + if( typeof(content) == 'object' && content.type == 'mark' ){ + if( content.subtype == 'chapter' ){ + currentChapter = parseInt(content?.atts?.number || "-1",10); + }else if( content.subtype == 'verses' ){ + currentVerse = parseInt(content?.atts?.number || "-1",10); + } + }else{ + if( currentChapter != lastChapter || currentVerse != lastVerse ){ + result.push({ chapter: currentChapter, verse: currentVerse }); + lastChapter = currentChapter; + lastVerse = currentVerse; + } + } + } + } + } + return result; +} + + +export interface TBlockContentIndex { + b: number; + c: number; +} + +export interface PerfReferenceSet{ + first: TBlockContentIndex; + last: TBlockContentIndex; + chapters: { [chapter: number]: TBlockContentIndex }; + verses: { [ref : string]: TBlockContentIndex }; +} + +export function getIndexedReferencesFromPerf( perf: Perf ): PerfReferenceSet { + let currentChapter = -1; + let currentVerse = -1; + + const first = { b: -1, c: -1 }; + const last = { b: -1, c: -1 }; + const chapters : { [chapter: number]: TBlockContentIndex } = {}; + const verses : { [ref : string]: TBlockContentIndex } = {}; + + const blocks = perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? []; + for (let blockIndex = 0; blockIndex < blocks.length; blockIndex++) { + const block = blocks[blockIndex]; + if (block.type == 'paragraph') { + const contents = block.content ?? []; + for (let contentIndex = 0; contentIndex < contents.length; contentIndex++) { + const content = contents[contentIndex]; + if( typeof(content) == 'object' && content.type == 'mark' ){ + if( content.subtype == 'chapter' ){ + currentChapter = parseInt(content?.atts?.number || "-1",10); + + chapters[currentChapter] = { + b: blockIndex, c: contentIndex + }; + }else if( content.subtype == 'verses' ){ + currentVerse = parseInt(content?.atts?.number || "-1",10); + + verses[`${currentChapter}:${currentVerse}`] = { + b: blockIndex, c: contentIndex + }; + } + + last.b = blockIndex; + last.c = contents.length-1; + } + + if( first.b == -1 && currentChapter != -1 ){ + first.b = blockIndex; + first.c = contentIndex; + } + } + } + } + return { first, last, chapters, verses }; +} + + +export function stringToPerfVerse( verseText: string ): PerfVerse{ + const newSection = []; + + //bookmark3 + + const wordTokens = stringTokenizer.tokenize( {text:verseText, includePunctuation:true, includeWhitespace:true, includeUnknown:true, verbose:true } ); + //tokens = stringTokenizer.classifyTokens( verseText ); + + for( const wordToken of wordTokens ){ + switch( wordToken.type ){ + case "word": case "number": + newSection.push( { + type: "wrapper", + subtype: 'usfm:w', + content: [ wordToken.token ], + }); + break; + //case "whitespace": case "punctuation": + default: + newSection.push( wordToken.token ); + break; + } + } + + //now put the enumeration information in. + const indexedNewSection = reindexPerfVerse(newSection); + + return indexedNewSection; +} + +export function getAttributedVerseCharactersFromPerf( perf: Perf, reference: TReference, includeAttributes: boolean = true, startIndex: TBlockContentIndex | undefined = undefined ): TAttributedString | string { + let result : TAttributedString | string = includeAttributes ? [] : ""; + let currentChapter = -1; + let currentVerse = -1; + let foundVerse = false; + + + //for( const [blockIndex, block] of (perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? []).entries() ){ + for( let blockIndex = 0; blockIndex < (perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? []).length; blockIndex++ ){ + const block = (perf?.sequences?.[perf?.main_sequence_id ?? ""]?.blocks ?? [])[blockIndex]; + if( block === undefined ) continue; + + if( block.type == 'paragraph' ){ + //for( const [contentIndex, content] of (block.content ?? []).entries() ){ + for( let contentIndex = 0; contentIndex < (block.content ?? []).length; contentIndex++ ){ + + //if the location of the verse is supplied we don't have to go hunt for it. + if( blockIndex == 0 && contentIndex == 0 && startIndex != undefined ){ + [currentChapter, currentVerse] = [reference .chapter, reference .verse]; + [blockIndex , contentIndex] = [startIndex.b , startIndex.c ]; + foundVerse = true; + } + + const content = block.content?.[contentIndex]; + if( content === undefined ) continue; + if( typeof( content ) == 'object' && content.type == 'mark' ){ + if( content.subtype == 'chapter' ){ + currentChapter = parseInt(content?.atts?.number || "-1",10); + }else if( content.subtype == 'verses' ){ + currentVerse = parseInt(content?.atts?.number || "-1",10); + } + }else if( currentChapter == reference.chapter && currentVerse == reference.verse ){ + //if this is a quote section we add a space on the front for some reason. + //if( contentIndex == 0 && (block.subtype == "usfm:q" || block.subtype == "usfm:q2" || block.subtype == "usfm:m" )){ + if( contentIndex == 0 ){ + if( includeAttributes ){ + (result as TAttributedString).push( { + char: " ", + blockIndex, + contentIndex, + charIndex: -1, //-1 allows this index when copied to insertions before this supplemented char, and passed to insertIntoPerfPiece to automatically get added before the local object instead of trying to splice into it which doesn't make sense as it isn't an actual word or string. + isMeta: false, + supplemented: true, + }); + }else{ + (result as string) += " "; + } + } + + + foundVerse = true; + + let currentWord : string | undefined = undefined; + if (typeof content == 'string') { + currentWord = content; + }else if( content.type == "wrapper" && content.subtype == "usfm:w" ){ + currentWord = content.content?.join( " " ) ?? ""; + } + + if( currentWord !== undefined ){ + if( includeAttributes ){ + if( typeof content == "object" && content.type == "wrapper" && content.subtype == "usfm:w" ){ + (result as TAttributedString).push( { + char: "<", + blockIndex, + contentIndex, + charIndex: -1, + isMeta: true, }); + } + for( const [charIndex, char] of currentWord?.split("")?.entries() ?? [] ){ + (result as TAttributedString).push( { + char, + blockIndex, + contentIndex, + charIndex, + isMeta: false, + }); + } + if( typeof content == "object" && content.type == "wrapper" && content.subtype == "usfm:w" ){ + (result as TAttributedString).push( { + char: ">", + blockIndex, + contentIndex, + charIndex: currentWord.length, + isMeta: true, }); + } + + }else{ + (result as string) += currentWord; + } + } + + }else if( foundVerse ){ + return result; + } + + } + } + } + + return result; +} + +export function stripAttributedString( attributedString: TAttributedString ): string { + return attributedString.map( (char) => char.char ).join( "" ); +} + +export function perfToUsfm( perf: Perf ): string { + const pipelineH = new PipelineHandler({proskomma: new Proskomma()}); + const perfToUsfmPipeline_outputs = pipelineH.runPipeline("perfToUsfmPipeline", { perf }); + return perfToUsfmPipeline_outputs.usfm; +} diff --git a/src/utils/languageUtils.ts b/src/utils/languageUtils.ts index 1452fa1e6..5728f915e 100644 --- a/src/utils/languageUtils.ts +++ b/src/utils/languageUtils.ts @@ -63754,4 +63754,4 @@ export const LanguageCodes: LanguageMetadata[] = isoLanguageData.map( }, ); -export { LanguageMetadata }; +export type { LanguageMetadata }; diff --git a/webviews/usfm-viewer/package.json b/webviews/usfm-viewer/package.json index 0ffb22d2a..cfc30356c 100644 --- a/webviews/usfm-viewer/package.json +++ b/webviews/usfm-viewer/package.json @@ -31,7 +31,7 @@ }, "devDependencies": { "@tailwindcss/typography": "^0.5.2", - "@types/react": "^18.2.43", + "@types/react": "^18.3.1", "@types/react-dom": "^18.2.17", "@typescript-eslint/eslint-plugin": "^6.14.0", "@typescript-eslint/parser": "^6.14.0",