From ce505b04b537c6dc6cbc680a2087feb55438393a Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:22 +0100 Subject: [PATCH 01/20] server: pod: remove server-side HTML escaping As of this commit, the language server protocol doesn't support HTML as markup content type and it seems that there are currently no plans to support it. [1] It's not really defined whether LSP servers should escape HTML or return HTML escape sequences for things like hover documentation; at least I wasn't able to find any clear specification on that. However, there is a note on the `MarkupContent` interface [2] that says that *clients* might sanitize the returned markup, in order to e.g. remove any HTML that might be in there. This goes for both `plaintext` and `markdown` markup kinds. It's therefore best to let clients decide whether they should escape any markdown content returned by the server or not. Because there are several servers that do this, the Neovim LSP client specifically handles common HTML escape sequences and converts them back to their un-escaped counterparts. [3] I think it's therefore safe to say that HTML shouldn't be escaped server-side and let clients decide whether they want to sanitize their strings or not. [1]: https://github.com/microsoft/language-server-protocol/issues/781 [2]: https://github.com/microsoft/vscode-languageserver-node/blob/14ddabfc22187b698e83ecde072247aa40727308/types/src/main.ts#L2048-L2049 [3]: https://github.com/neovim/neovim/blob/3a61f05dd2d6cb2ac9bca4795467f459595e58dd/runtime/lua/vim/lsp/util.lua#L1265-L1276 Signed-off-by: Max R. Carrara --- server/src/pod.ts | 45 --------------------------------------------- 1 file changed, 45 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index e9be6ab..9bee008 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -435,54 +435,9 @@ const processInlineElements = (line: string): string => { // Handle index entries (X), ignoring as Markdown doesn't have an index line = line.replace(/X<([^<>]+)>/g, ""); - // Escape HTML entities last since we use them above - line = escapeHTML(line); - return line; }; - -function escapeRegExp(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string - } - - - -const escapeHTML = (str: string): string => { - const map: { [key: string]: string } = { - "&": "&", - "<": "<", - ">": ">", - '"': """, - "'": "'", - "\\\\": "\\", // Two backslashes become one - - // These are required for the regex to consume & to ensure they don't get mapped to amp style. - "\\&": "\\&", - "\\<": "\\<", - '\\"': '\\"', - "\\'": "\\'", - }; - - // If the number of backticks is odd, it means backticks are unbalanced - const backtickCount = (str.match(/`/g) || []).length; - const segments = str.split("`"); - - if (backtickCount % 2 !== 0 || segments.length % 2 === 0) { - // Handle the unbalanced backticks here - str = str.replaceAll("`", ""); - } - - // Escape special characters and create a regex pattern - const pattern = new RegExp( Object.keys(map).map(escapeRegExp).join('|'), 'g' ); - - for (let i = 0; i < segments.length; i += 2) { - segments[i] = segments[i].replace(pattern, (m) => map[m]); - } - - return segments.join("`"); -}; - const escapeBackticks = (str: string): string => { let count = (str.match(new RegExp(tempPlaceholder, 'g')) || []).length; str = str.replace(new RegExp(tempPlaceholder, 'g'), '`'); // Backticks inside don't need to be escaped. From 9233746a3774b03836687b3d7364cd8df6cb6898 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:29 +0100 Subject: [PATCH 02/20] server: pod: improve regexes for inline format codes This commit makes the regexes that handle the conversion of POD inline format codes to markdown formatting more robust and more conformant to the POD specification. Specifically, previously only codes like "B<< foobar >>" were supported, whereas now codes like the following are also supported: B<< foo bar >> B<<<< foo bar >>>> Both of these formatting codes result in the following markdown: **foo bar** However, due to the nature of doing this kind of conversion using regexes, mismatching numbers of brackets are still possible ("allowed"). Moreover, because this conversion is performed line-by-line, formatting codes like the following two examples will not be recognized, even though the POD spec considers them valid: B<<<<< foo bar >>>>> B<< foo bar >> Both the mismatching brackets and the multiline format code parsing could be addressed by writing a separate parser & converter specifically for formatting codes, but this was deemed as having little to no benefit for the amount of work that it requires. Signed-off-by: Max R. Carrara --- server/src/pod.ts | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index 9bee008..00803d4 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -403,8 +403,7 @@ const processInlineElements = (line: string): string => { line = line.replace(/C<((?:[^<>]|[EL]<[^<>]+>)+?)>/g, (match, code) => escapeBackticks(code)); // Unfortunately doesn't require the <<< to be matched in quantity. E<> is allowed automatically - line = line.replace(/C<< (.+?) >>/g, (match, code) => escapeBackticks(code)); - line = line.replace(/C<<<+ (.+?) >+>>/g, (match, code) => escapeBackticks(code)); + line = line.replace(/C<<+\s+(.+?)\s+>+>/g, (match, code) => escapeBackticks(code)); // Handle special characters (E) line = line.replace(/E<([^>]+)>/g, (match, entity) => convertE(entity)); @@ -414,17 +413,17 @@ const processInlineElements = (line: string): string => { // Handle bold (B) line = line.replace(/B<([^<>]+)>/g, "**$1**"); - line = line.replace(/B<< (.+?) >>/g, "**$1**"); + line = line.replace(/B<<+\s+(.+?)\s+>+>/g, "**$1**"); // Handle italics (I) line = line.replace(/I<([^<>]+)>/g, "*$1*"); - line = line.replace(/I<< (.+?) >>/g, "*$1*"); + line = line.replace(/I<<+\s+(.+?)\s+>+>/g, "*$1*"); // Handle links (L), URLS auto-link in vscode's markdown line = line.replace(/L<(http[^>]+)>/g, " $1 "); line = line.replace(/L<([^<>]+)>/g, "`$1`"); - line = line.replace(/L<< (.*?) >>/g, "`$1`"); + line = line.replace(/L<<+\s+(.*?)\s+>+>/g, "`$1`"); // Handle non-breaking spaces (S) line = line.replace(/S<([^<>]+)>/g, "$1"); From f25a58bf099ddd79ba5e831684f8a1227eeb0687 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:32 +0100 Subject: [PATCH 03/20] server: pod: support bold italic / italic bold formatting codes Signed-off-by: Max R. Carrara --- server/src/pod.ts | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index 00803d4..90afc3a 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -140,13 +140,13 @@ async function fsPathOrAlt(fsPath: string | undefined): Promise { // Mapping the Unicode non-character U+FFFF back to escaped backticks line = line.replace(new RegExp(tempPlaceholder, 'g'), '\\`'); + // Handle bold italic (B>) + line = line.replace(/B]+)>>/g, "***$1***"); + line = line.replace(/B>+>/g, "***$1***"); + line = line.replace(/B<<+\s+I<([^<>]+)>\s+>+>/g, "***$1***"); + line = line.replace(/B<<+\s+I<<+\s+(.+?)\s+>+>\s+>+>/g, "***$1***"); + + // Handle italic bold (B>) + line = line.replace(/I]+)>>/g, "***$1***"); + line = line.replace(/I>+>/g, "***$1***"); + line = line.replace(/I<<+\s+B<([^<>]+)>\s+>+>/g, "***$1***"); + line = line.replace(/I<<+\s+B<<+\s+(.+?)\s+>+>\s+>+>/g, "***$1***"); + // Handle bold (B) line = line.replace(/B<([^<>]+)>/g, "**$1**"); line = line.replace(/B<<+\s+(.+?)\s+>+>/g, "**$1**"); From f381f439952941aaec694097c37f8f602c0c6ef0 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:35 +0100 Subject: [PATCH 04/20] server: pod: rewrite POD parser and markdown converter This commit completely rewrites the existing POD parsing and markdown conversion logic, separating it into several components and data types. The three main components, the "parser", "processor" and "converter", are all represented as classes in order to keep their corresponding helpers namespaced (and private). While a purely functional approach is also possible here, I found this to be the most straightforward way to keep everything contained / grouped without introducing any additional files and such. These components are as follows: 1. `RawPodParser` The `RawPodParser` class takes the contents of a file, parses any POD document it encounters and returns it as a `RawPodDocument`. This kind of POD document is called "raw" because no additional validity checks are performed during the parse (e.g. checking if an `=over` command is eventually matched by a `=back` command). 2. `PodProcessor` This class takes a `RawPodDocument` and transforms it into a `PodDocument`, performing various validity checks and data conversions. In essence, this makes sure that the given document conforms as much to the POD specification as feasibly possible (without overcomplicating all of this). Additionally, certain kinds of paragraphs (e.g. verbatim paragraphs) are merged, in order to make them easier to use when converting the POD doc to markdown. 3. `PodToMarkdownConverter` As the name implies, this class converts the given `PodDocument` into a string containing Markdown. This conversion is infallible, as every `PodDocument` returned by the POD processor has already been verified to be correct. All of the components above make it possible to return an entire section (denoted by `=head1`, `=head2`, etc.) belonging to the symbol name being looked up when a client requests the symbol's hover documentation. This means that all sub-sections and other POD commands (e.g. lists in `=over ... =back` regions) are now also included in the hover documentation. Additionally, several other POD paragraphs / structures are now also supported while also fixing some minor things. To summarize: - Consecutive verbatim paragraphs are merged to one large paragraph and rendered as markdown code block without syntax highlighting - Nested `=over ... =back` regions with specified indent levels * This also allows nested lists to be rendered neatly - `=begin html ... =end html` regions are converted to markdown code blocks with HTML syntax highlighting - `=begin code ... =end code` regions are converted to markdown code blocks with Perl syntax highlighting - Nested `=begin ... =end` regions with (custom) format names and parameters (parameters are currently ignored) - `=for` paragraphs are treated the same as `=begin ... =end` regions, as per spec - `=head1`, `=head2`, etc. are now correctly converted to their respective Markdown counterparts (e.g. `=head1 foo` --> `# foo`) - Searching for documentation of methods is now more precise Signed-off-by: Max R. Carrara --- server/src/pod.ts | 2120 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 1787 insertions(+), 333 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index 90afc3a..2fbb731 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -3,97 +3,1821 @@ import { PerlDocument, PerlElem, PerlSymbolKind } from "./types"; import Uri from "vscode-uri"; import { isFile } from "./utils"; -export async function getPod(elem: PerlElem, perlDoc: PerlDocument, modMap: Map): Promise { - // File may not exists. Return nothing if it doesn't +// Error types - const absolutePath = await resolvePathForDoc(elem, perlDoc, modMap); +export type PodParseError = RawPodParseError | PodProcessingError; + +export interface RawPodParseError { + kind: "parseerror"; + message: string; + lineNo: number; +} + +export interface PodProcessingError { + kind: "processingerror"; + message: string; +} + +/** A paragraph whose first line matches `^[ \t]`. + * + * May also be *inside* `=begin [formatname]` and `=end [formatname]` commands, + * as long as [formatname] starts with a colon `:`. + */ +export interface VerbatimParagraph { + kind: "verbatim"; + lineNo?: number; + lines: Array; +} + +/** Not a CommandParagraph and not a VerbatimParagraph. Basically just + * arbitrary text. + * + * May also be *inside* `=begin [formatname]` and `=end [formatname]` commands, + * as long as [formatname] starts with a colon `:`. + */ +export interface OrdinaryParagraph { + kind: "ordinary"; + lineNo?: number; + lines: Array; +} + +/** Contents *inside* `=begin [formatname] [parameter]` and `=end [formatname]` + * commands, as long as [formatname] does *not* start with a colon `:`. + */ +export interface DataParagraph { + kind: "data"; + lineNo?: number; + lines: Array; +} + +// Concrete command paragraphs +// +// Note: `=pod` and `=cut` aren't typed here, as they're already represented +// by a `PodBlock`. + +export const enum HeaderLevel { + One = 1, + Two, + Three, + Four, + Five, + Six, +} + +/** Represents `=head1` until `=head6`. + */ +export interface HeaderParagraph { + kind: "head"; + lineNo?: number; + level: HeaderLevel; + contents: string; +} + +/** Represents `=over`. + */ +export interface OverParagraph { + kind: "over"; + lineNo?: number; + level: number; // non-zero and `4` by default +} + +/** Represents `=back`. + */ +export interface BackParagraph { + kind: "back"; + lineNo?: number; +} + +/** Represents `=item *` or a plain `=item`. + * May be followed by text. + */ +export interface UnordererdItemParagraph { + kind: "unordereditem"; + lineNo?: number; + lines?: Array; +} + +/** `=item N` or `=item N.` where `N` is any whole number. + * May be followed by text. + */ +export interface OrderedItemParagraph { + kind: "ordereditem"; + num: number; + lineNo?: number; + lines?: Array; +} + +/** Represents `=encoding [encodingname]` - currently parsed, but unused. + */ +export interface EncodingParagraph { + kind: "encoding"; + lineNo?: number; + name: string; +} + +/** Represents `=begin [formatname] [parameter]`. + */ +export interface BeginParagraph { + kind: "begin"; + lineNo?: number; + formatname: string; + parameter: string; +} + +/** Represents `=end [formatname]`. + */ +export interface EndParagraph { + kind: "end"; + lineNo?: number; + formatname: string; +} + +/** Represents `=for [formatname] [contents]`. + * If `formatname` begins with a colon `:`, `contents` will be interpreted + * as an ordinary paragraph. + * + * If it doesn't begin with a colon, `contents` will be interpreted as a data + * paragraph. + */ +export interface ForParagraph { + kind: "for"; + lineNo?: number; + formatname: string; + lines: Array; +} + +/** Yielded if none of the other command paragraphs match. + */ +export interface UnknownCommandParagraph { + kind: "unknown"; + lineNo?: number; + cmd: string; + contents: string; +} + +export type CommandParagraph = HeaderParagraph + | OverParagraph + | BackParagraph + | UnordererdItemParagraph + | OrderedItemParagraph + | EncodingParagraph + | BeginParagraph + | EndParagraph + | ForParagraph + | UnknownCommandParagraph; + +export type PodParagraph = CommandParagraph + | VerbatimParagraph + | OrdinaryParagraph + | DataParagraph; + +/** Represents the "raw" raw paragraphs between `=pod ... =cut` commands. + * "Raw" here means that all kinds of paragraphs can appear anywhere and in any + * order -- no checks (beyond parsing the paragraphs correctly) are performed. + * + * During the parser's second pass, the paragraphs in this block are then + * checked for their validity, e.g. whether `=over` is followed by a `=back` + * and so on, before processing the block into a `PodBlock`. + * + * Repeated occurrences of `=pod` and `=cut` commands are ignored when this + * block is being constructed. + */ +export interface RawPodBlock { + kind: "rawpodblock"; + lineNo?: number; + paragraphs: Array; +} + +export type PodBlockContent = VerbatimParagraph + | OrdinaryParagraph + | HeaderParagraph + | UnordererdItemParagraph + | OrderedItemParagraph + | EncodingParagraph + | UnknownCommandParagraph + | OverBlock + | DataBlock + | NormalDataBlock; + +/** Represents a list of paragraphs and other blocks between `=pod ... =cut` commands. + * + * This kind of block is created by processing a `RawPodBlock` during the parser's + * second pass. + */ +export interface PodBlock { + kind: "podblock"; + lineNo?: number; + paragraphs: Array; +} + +export type OverBlockContent = VerbatimParagraph + | OrdinaryParagraph + | UnordererdItemParagraph + | OrderedItemParagraph + | EncodingParagraph + | UnknownCommandParagraph + | OverBlock + | DataBlock + | NormalDataBlock; + +/** Represents an `=over` ... `=back` block. + * - Cannot be empty + * - Cannot contain headers (HeaderParagraphs) + */ +export interface OverBlock { + kind: "overblock"; + lineNo?: number; + level: number; // non-zero and `4` by default + paragraphs: Array; +} + +export type DataBlockContent = DataParagraph + | DataBlock + | NormalDataBlock; + +/** Represents a `=begin [formatname] [parameter]` ... `=end [formatname]` block. + * `formatname` must not begin with a colon `:`. + * + * This may also represents a `=for [formatname] text...` command. + * + * Other command paragraphs may *not* appear inside this type of block. + * Verbatim and ordinary paragraphs become data paragraphs. + */ +export interface DataBlock { + kind: "datablock"; + lineNo?: number; + formatname: string; + parameter: string; + paragraphs: Array; +} + +/** Like a `DataBlock`, but `formatname` begins with a colon `:`. + * This means that the contents inside the `=begin ... =end` block are subject + * to normal processing. + */ +export interface NormalDataBlock { + kind: "normaldatablock"; + lineNo?: number; + formatname: string; + parameter: string; + paragraphs: Array; +} + +/** Represents a POD document which hasn't yet been processed further. + */ +export interface RawPodDocument { + kind: "rawpoddocument", + blocks: Array; +} + +/** A completely parsed and processed POD document. + */ +export interface PodDocument { + kind: "poddocument", + blocks: Array; +} + +/** Tracks the state for parsing POD content from a file. + * See {@link parse} for more information. + */ +export class RawPodParser { + #lineIter: Generator = this.#makeLineIter([]); + #currentLineNo: number = 0; + #currentBlock?: RawPodBlock = undefined; + #parsedBlocks: Array = []; + + /** Parses and returns POD content from the given file contents. + * Note that this returns a {@link RawPodDocument} on success, which contains + * POD content that hasn't been processed and checked for validity yet. + * This is done via the {@link PodProcessor}. + */ + parse(fileContents: string): RawPodDocument | RawPodParseError { + const lines = fileContents.split(/\r?\n/); + + // Reset state + this.#lineIter = this.#makeLineIter(lines); + this.#currentLineNo = 0; + this.#currentBlock = undefined; + this.#parsedBlocks = []; + + let line: string | undefined; + while (true) { + line = this.#getNextLine(); + + // EOF + if (line === undefined) { + break; + } + + // line is empty + if (line === "") { + continue; + } + + if (/^=[a-zA-Z]/.test(line)) { + if (line.startsWith("=cut")) { + if (this.#currentBlock !== undefined) { + this.#parsedBlocks.push(this.#currentBlock); + this.#currentBlock = undefined; + } + + // ignoring repeated `=cut`s here, because they don't really matter + + this.#skipUntilEmptyLine(); + continue; + } + + if (this.#currentBlock === undefined) { + this.#currentBlock = { kind: "rawpodblock", lineNo: this.#currentLineNo, paragraphs: [] }; + } + + if (line.startsWith("=pod")) { + this.#skipUntilEmptyLine(); + continue; + } + + // other command paragraphs + let paraResult = this.#tryParseCommand(line); + + if (paraResult.kind === "parseerror") { + return paraResult; + } + + // no need to skip to an empty line here, as that is handled for + // each paragraph in tryParseCommand + + this.#currentBlock.paragraphs.push(paraResult); + continue; + } + + if (this.#currentBlock === undefined) { + continue; + } + + if (/^[ \t]/.test(line)) { + let para = this.#parseVerbatim(line); + + this.#currentBlock.paragraphs.push(para); + continue; + } + + let para = this.#parseOrdinary(line); + this.#currentBlock.paragraphs.push(para); + } + + if (this.#currentBlock !== undefined) { + const lineNo = this.#currentBlock.lineNo as number; + return { + kind: "parseerror", + lineNo: lineNo, + message: `"=pod ... =cut" region beginning at line ${lineNo} was never closed (missing "=cut")` + }; + } + + return { + kind: "rawpoddocument", + blocks: this.#parsedBlocks, + }; + } + + *#makeLineIter(lines: string[]) { + yield* lines; + } + + #getNextLine(): string | undefined { + let { value, done } = this.#lineIter.next(); + + if (done || value === undefined) { + return; + } + + this.#currentLineNo++; + + return value; + } + + #skipUntilEmptyLine(): void { + let line: string | undefined; + + while (true) { + line = this.#getNextLine(); + + if (!line) { + return; + } + } + } + + #appendNextLineUntilEmptyLine( + content: string, + trimOpts: { trimStart?: boolean, trimEnd?: boolean } = {} + ): string { + let line: string | undefined; + + while (line = this.#getNextLine()) { + if (trimOpts.trimStart && trimOpts.trimEnd) { + line = line.trim(); + } else if (trimOpts.trimStart) { + line = line.trimStart(); + } else if (trimOpts.trimEnd) { + line = line.trimEnd(); + } + + content += " " + line; + } + + return content; + } + + + static #parsedLevelToHeaderLevel(matchedLevel: string): HeaderLevel | undefined { + const level = parseInt(matchedLevel); + + if (isNaN(level)) { + return; + } + + const levels = [ + undefined, + HeaderLevel.One, + HeaderLevel.Two, + HeaderLevel.Three, + HeaderLevel.Four, + HeaderLevel.Five, + HeaderLevel.Six, + ] as const; + + return levels[level]; + } + + /** Tries to parse a command paragraph. + * The passed `line` is expected to have matched `/^=[a-zA-Z]/` beforehand. + */ + #tryParseCommand(line: string): PodParagraph | RawPodParseError { + line = line.trimEnd(); + const lineNo = this.#currentLineNo; + + let matchResult; + + // =head[1-6] + matchResult = [...line.matchAll(/^=head(?[1-6])(\s+(?.*))?/g)][0]; + if (matchResult !== undefined) { + // Casts here are fine, because we only match expected level in regex + const matchedLevel = matchResult.groups?.level as string; + const level = RawPodParser.#parsedLevelToHeaderLevel(matchedLevel) as HeaderLevel; + + let contents = matchResult.groups?.contents || ""; + contents = this.#appendNextLineUntilEmptyLine( + contents, { trimStart: true, trimEnd: true } + ); + + let para: HeaderParagraph = { + kind: "head", + lineNo: lineNo, + contents: contents, + level: level, + }; + + return para; + } + + // =item + // =item\s+* + // =item\s+\d+\.? + // =item\s+[text...] + matchResult = [...line.matchAll(/^=item(\s+((?\*)\s*|((?\d+)\.?\s*))?)?(?.*)?/g)][0]; + if (matchResult !== undefined) { + // =item * + let asterisk = matchResult.groups?.asterisk; + if (asterisk) { + let text = matchResult.groups?.text; + + let para: UnordererdItemParagraph = { + kind: "unordereditem", + lineNo: lineNo, + }; + + if (text) { + this.#appendNextLineUntilEmptyLine(text, { trimStart: true, trimEnd: true }); + para.lines = [text]; + } else { + this.#skipUntilEmptyLine(); + } + + return para; + } + + // =item N. + let num = matchResult.groups?.num; + if (num) { + let text = matchResult.groups?.text; + + let para: OrderedItemParagraph = { + kind: "ordereditem", + num: parseInt(num), + lineNo: lineNo, + }; + + if (text) { + this.#appendNextLineUntilEmptyLine(text, { trimStart: true, trimEnd: true }); + para.lines = [text]; + } else { + this.#skipUntilEmptyLine(); + } + + return para; + } + + // =item Lorem ipsum dolor ... + let text = matchResult.groups?.text; + if (text) { + let currentLine: string | undefined = text; + let lines: Array = []; + + while (currentLine) { + lines.push(currentLine.trim()); + + currentLine = this.#getNextLine(); + } + + let para: UnordererdItemParagraph = { + kind: "unordereditem", + lineNo: lineNo, + lines: lines, + }; + + return para; + } + + // =item + let para: UnordererdItemParagraph = { + kind: "unordereditem", + lineNo: lineNo, + }; + + this.#skipUntilEmptyLine(); + + return para; + } + + // =encoding + matchResult = [...line.matchAll(/^=encoding\s+(?\S+)/g)][0]; + if (matchResult !== undefined) { + let name = matchResult.groups?.name || ""; + + this.#skipUntilEmptyLine(); + + let para: EncodingParagraph = { + kind: "encoding", + lineNo: lineNo, + name: name, + }; + + return para; + } + + // =over + matchResult = [...line.matchAll(/^=over(\s+(?\d+(\.\d*)?))?/g)][0]; + if (matchResult !== undefined) { + let matchedLevel = matchResult.groups?.num; + + let level: number = 0; + + if (matchedLevel !== undefined) { + level = parseFloat(matchedLevel); + } + + const defaultOverLevel = 4; + + level = level > 0 ? level : defaultOverLevel; + + this.#skipUntilEmptyLine(); + + let para: OverParagraph = { + kind: "over", + lineNo: lineNo, + level: level, + }; + + return para; + } + + // =back + if (line.startsWith("=back")) { + this.#skipUntilEmptyLine(); + + let para: BackParagraph = { + kind: "back", + lineNo: lineNo, + }; + + return para; + } + + // =begin + matchResult = [ + ...line.matchAll( + /^=begin(\s+(?:?[-a-zA-Z0-9_]+)(\s+(?.*))?)?/g + ) + ][0]; + if (matchResult !== undefined) { + if (matchResult.groups?.formatname === undefined) { + return { + kind: "parseerror", + lineNo: lineNo, + message: `"=begin" command at line ${lineNo} does not contain any format name`, + }; + } + + let parameter = matchResult.groups?.parameter || ""; + parameter = this.#appendNextLineUntilEmptyLine(parameter).trim(); + + let para: BeginParagraph = { + kind: "begin", + lineNo: lineNo, + formatname: matchResult.groups?.formatname?.trim() as string, + parameter: parameter, + }; + + return para; + } + + // =end + matchResult = [...line.matchAll(/^=end(\s+(?:?[-a-zA-Z0-9_]+))?/g)][0]; + if (matchResult !== undefined) { + if (matchResult.groups?.formatname === undefined) { + return { + kind: "parseerror", + lineNo: lineNo, + message: `"=end" command at line ${lineNo} does not contain any format name`, + }; + } + + this.#skipUntilEmptyLine(); + + let para: EndParagraph = { + kind: "end", + lineNo: lineNo, + formatname: matchResult.groups?.formatname?.trim() as string, + }; + + return para; + } + + // =for + matchResult = [ + ...line.matchAll(/^=for(\s+(?:?[-a-zA-Z0-9_]+)(\s+(?.*))?)?/g) + ][0]; + if (matchResult !== undefined) { + if (matchResult.groups?.formatname === undefined) { + return { + kind: "parseerror", + lineNo: lineNo, + message: `"=for" command at line ${lineNo} does not contain any format name`, + }; + } + + let contents = matchResult.groups?.contents.trim() || ""; + + // similar to parsing an ordinary or verbatim paragraph + let currentLine: string | undefined = contents; + let lines: Array = []; + + while (currentLine) { + lines.push(currentLine.trimEnd()); + + currentLine = this.#getNextLine(); + } + + let para: ForParagraph = { + kind: "for", + lineNo: lineNo, + formatname: matchResult.groups?.formatname?.trim() as string, + lines: lines, + }; + + return para; + } + + // unknown command paragraph; just parse it so we can toss it later + matchResult = [...line.matchAll(/^=(?\S+)(\s+(?.*))?/g)][0]; + if (matchResult !== undefined) { + let contents = matchResult.groups?.contents || ""; + contents = this.#appendNextLineUntilEmptyLine(contents); + + let para: UnknownCommandParagraph = { + kind: "unknown", + lineNo: lineNo, + cmd: matchResult.groups?.cmd as string, + contents: contents, + }; + + return para; + } + + return { + kind: "parseerror", + lineNo: lineNo, + message: `failed to parse command from line ${lineNo}: "${line}" is not recognized as command paragraph`, + }; + } + + /** Parses a verbatim paragraph. + * The passed `line` is expected to have matched `/^[ \t]/` beforehand. + */ + #parseVerbatim(line: string): VerbatimParagraph { + let currentLine: string | undefined = line; + const lineNo = this.#currentLineNo; + + let lines: Array = []; + + // breaks if undefined or empty line + while (currentLine) { + lines.push(currentLine.trimEnd()); + + currentLine = this.#getNextLine(); + } + + return { + kind: "verbatim", + lineNo: lineNo, + lines: lines, + }; + } + + /** Parses an ordinary paragraph. + * The passed `line` is expected to have matched neither`/^=[a-zA-Z]` or + * `/^[ \t]` beforehand. + */ + #parseOrdinary(line: string): OrdinaryParagraph { + let currentLine: string | undefined = line; + const lineNo = this.#currentLineNo; + + let lines: Array = []; + + // breaks if undefined or empty line + while (currentLine) { + lines.push(currentLine); + + currentLine = this.#getNextLine(); + } + + return { + kind: "ordinary", + lineNo: lineNo, + lines: lines, + }; + } +} + +/** Tracks the state for processing a {@link RawPodDocument} into a proper + * {@link PodDocument}. + */ +export class PodProcessor { + #blockIter: Generator = this.#makeBlockIter([]); + #processedBlocks: Array = []; + + /** Processes a {@link RawPodDocument} into a proper {@link PodDocument}. + * + * This checks whether the given raw document is valid (conforms as much to + * the POD specification as possible) and also merges certain paragraphs for + * ease of use. + */ + process(document: RawPodDocument): PodDocument | PodProcessingError { + // Reset state + this.#blockIter = this.#makeBlockIter(document.blocks); + this.#processedBlocks = []; + + const blockProcessor = new PodBlockProcessor(); + + let currentBlock = this.#getNextBlock(); + while (currentBlock) { + const processedBlockResult = blockProcessor.process(currentBlock); + + if (processedBlockResult.kind === "processingerror") { + return processedBlockResult; + } + + this.#processedBlocks.push(processedBlockResult); + currentBlock = this.#getNextBlock(); + } + + return { + kind: "poddocument", + blocks: this.#processedBlocks, + }; + } + + *#makeBlockIter(rawBlocks: Array) { + yield* rawBlocks; + } + + #getNextBlock(): RawPodBlock | undefined { + let { value, done } = this.#blockIter.next(); + + if (done || value === undefined) { + return; + } + + return value; + } +} + +/** Inner workings of {@link PodProcessor}. */ +class PodBlockProcessor { + #paragraphIter: Generator = this.#makeParagraphIter([]); + #podBlock: PodBlock = { kind: "podblock", paragraphs: [] }; + + *#makeParagraphIter(paragraphs: Array) { + yield* paragraphs; + } + + #getNextParagraph(): PodParagraph | undefined { + let { value, done } = this.#paragraphIter.next(); + + if (done || value === undefined) { + return; + } + + return value; + } + + process(block: RawPodBlock): PodBlock | PodProcessingError { + // Reset state + this.#paragraphIter = this.#makeParagraphIter(block.paragraphs); + this.#podBlock = { kind: "podblock", paragraphs: [] }; + + let para: PodParagraph | undefined; + let previousPara: PodParagraph | undefined; + + while (true) { + previousPara = para; + para = this.#getNextParagraph(); + + if (!para) { + break; + } + + switch (para.kind) { + case "verbatim": + const lastPara = this.#podBlock.paragraphs[this.#podBlock.paragraphs.length - 1]; + + // Merge verbatim paragraphs for easier conversion later. + if (lastPara && lastPara.kind === "verbatim") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedVerbatim: VerbatimParagraph = { + kind: "verbatim", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + this.#podBlock.paragraphs[this.#podBlock.paragraphs.length - 1] = mergedVerbatim; + break; + } + + this.#podBlock.paragraphs.push(para); + break; + case "ordinary": + case "unordereditem": + case "ordereditem": + case "head": + this.#podBlock.paragraphs.push(para); + break; + case "data": + return { + kind: "processingerror", + message: 'encountered unexpected data paragraph', + }; + case "over": + let overBlockResult = this.#enterOverBlock(para); + + if (overBlockResult.kind === "processingerror") { + return overBlockResult; + } + + this.#podBlock.paragraphs.push(overBlockResult); + break; + case "back": + return { + kind: "processingerror", + message: "'=back' does not have matching '=over'", + }; + case "begin": + let dataBlockResult = this.#enterDataBlock(para); + + if (dataBlockResult.kind === "processingerror") { + return dataBlockResult; + } + + this.#podBlock.paragraphs.push(dataBlockResult); + break; + case "end": + return { + kind: "processingerror", + message: `'=end ${para.formatname}' does not have matching '=begin ${para.formatname}'`, + }; + case "for": + let forDataBlock = this.#buildDataBlockFromForPara(para); + + this.#podBlock.paragraphs.push(forDataBlock); + + break; + case "encoding": // ignored + case "unknown": // ignored + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return this.#podBlock; + } + + // `level` must be non-zero. + #enterOverBlock(paragraph: OverParagraph): OverBlock | PodProcessingError { + let overBlock: OverBlock = { + kind: "overblock", + lineNo: paragraph.lineNo, + level: paragraph.level, + paragraphs: [], + }; + + let isProcessingBlock = true; + let para: PodParagraph | undefined; + + while (isProcessingBlock) { + para = this.#getNextParagraph(); + + if (para === undefined) { + return { + kind: "processingerror", + message: 'unexpected end of paragraphs while processing "=over ... =back" block', + }; + } + + switch (para.kind) { + case "verbatim": + const lastPara = overBlock.paragraphs[overBlock.paragraphs.length - 1]; + + // Merge verbatim paragraphs for easier conversion later. + if (lastPara && lastPara.kind === "verbatim") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedVerbatim: VerbatimParagraph = { + kind: "verbatim", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + overBlock.paragraphs[overBlock.paragraphs.length - 1] = mergedVerbatim; + break; + } + + overBlock.paragraphs.push(para); + break; + case "ordinary": + case "unordereditem": + case "ordereditem": + overBlock.paragraphs.push(para); + break; + case "head": + return { + kind: "processingerror", + message: `encountered invalid paragraph in "=over ... =back" block: "=head${para.level} ${para.contents}"` + }; + case "data": + return { + kind: "processingerror", + message: 'encountered unexpected data paragraph in "=over ... =back" block', + }; + case "over": + let nestedOverBlockResult = this.#enterOverBlock(para); + + if (nestedOverBlockResult.kind === "processingerror") { + return nestedOverBlockResult; + } + + overBlock.paragraphs.push(nestedOverBlockResult); + break; + case "back": + isProcessingBlock = false; + break; + case "begin": + let nestedDataBlockResult = this.#enterDataBlock(para); + + if (nestedDataBlockResult.kind === "processingerror") { + return nestedDataBlockResult; + } + + overBlock.paragraphs.push(nestedDataBlockResult); + break; + case "end": + return { + kind: "processingerror", + message: `'=end ${para.formatname}' does not have matching '=begin ${para.formatname}'`, + }; + case "for": + let nestedForDataBlock = this.#buildDataBlockFromForPara(para); + + overBlock.paragraphs.push(nestedForDataBlock); + case "encoding": // ignored + case "unknown": // ignored + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return overBlock; + } + + #enterDataBlock(paragraph: BeginParagraph): DataBlock | NormalDataBlock | PodProcessingError { + if (paragraph.formatname.startsWith(":")) { + return this.#buildNormalDataBlock(paragraph); + } else { + return this.#buildDataBlock(paragraph); + } + } + + #buildDataBlock(paragraph: BeginParagraph): DataBlock | PodProcessingError { + let dataBlock: DataBlock = { + kind: "datablock", + formatname: paragraph.formatname, + parameter: paragraph.parameter, + paragraphs: [], + }; + + let isProcessingBlock = true; + let para: PodParagraph | undefined; + + while (isProcessingBlock) { + para = this.#getNextParagraph(); + + if (para === undefined) { + return { + kind: "processingerror", + message: `unexpected end of paragraphs while processing "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, + }; + } + + switch (para.kind) { + case "ordinary": + case "verbatim": + const lastPara = dataBlock.paragraphs[dataBlock.paragraphs.length - 1]; + + // Ordinary and verbatim paragraphs are merged into the previous data paragraph. + if (lastPara && lastPara.kind === "data") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedData: DataParagraph = { + kind: "data", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + dataBlock.paragraphs[dataBlock.paragraphs.length - 1] = mergedData; + break; + } + + let dataPara: DataParagraph = { + kind: "data", + lines: para.lines, + }; + + dataBlock.paragraphs.push(dataPara); + + break; + case "data": + return { + kind: "processingerror", + message: `pre-existing data paragraph in "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, + }; + case "encoding": + case "unordereditem": + case "ordereditem": + case "head": + case "over": + case "back": + case "unknown": + return { + kind: "processingerror", + message: `unexpected command paragraph "${para.kind}" in "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, + }; + case "begin": + let nestedDataBlockResult = this.#enterDataBlock(para); + + if (nestedDataBlockResult.kind === "processingerror") { + return nestedDataBlockResult; + } + + dataBlock.paragraphs.push(nestedDataBlockResult); + break; + case "end": + const [beginFmtName, endFmtName] = [dataBlock.formatname.trim(), para.formatname.trim()]; + + if (beginFmtName !== endFmtName) { + return { + kind: "processingerror", + message: `"=end ${endFmtName}" does not match "=begin ${beginFmtName}"`, + }; + } + + isProcessingBlock = false; + break; + case "for": + let nestedForDataBlock = this.#buildDataBlockFromForPara(para); + + dataBlock.paragraphs.push(nestedForDataBlock); + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return dataBlock; + } + + #buildNormalDataBlock(paragraph: BeginParagraph): NormalDataBlock | PodProcessingError { + let dataBlock: NormalDataBlock = { + kind: "normaldatablock", + formatname: paragraph.formatname, + parameter: paragraph.parameter, + paragraphs: [], + }; + + let isProcessingBlock = true; + let para: PodParagraph | undefined; + + while (isProcessingBlock) { + para = this.#getNextParagraph(); + + if (para === undefined) { + return { + kind: "processingerror", + message: `unexpected end of paragraphs while processing "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, + }; + } + + switch (para.kind) { + case "verbatim": + const lastPara = dataBlock.paragraphs[dataBlock.paragraphs.length - 1]; + + // Merge verbatim paragraphs for easier conversion later. + if (lastPara && lastPara.kind === "verbatim") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedVerbatim: VerbatimParagraph = { + kind: "verbatim", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + dataBlock.paragraphs[dataBlock.paragraphs.length - 1] = mergedVerbatim; + break; + } + + dataBlock.paragraphs.push(para); + break; + case "ordinary": + case "unordereditem": + case "ordereditem": + case "head": + dataBlock.paragraphs.push(para); + break; + case "data": + return { + kind: "processingerror", + message: `unexpected data paragraph in "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, + }; + case "over": + let overBlockResult = this.#enterOverBlock(para); + + if (overBlockResult.kind === "processingerror") { + return overBlockResult; + } + + dataBlock.paragraphs.push(overBlockResult); + break; + case "back": + return { + kind: "processingerror", + message: "'=back' does not have matching '=over'", + }; + case "begin": + let dataBlockResult = this.#enterDataBlock(para); + + if (dataBlockResult.kind === "processingerror") { + return dataBlockResult; + } + + dataBlock.paragraphs.push(dataBlockResult); + break; + case "end": + const [beginFmtName, endFmtName] = [dataBlock.formatname.trim(), para.formatname.trim()]; + + if (beginFmtName !== endFmtName) { + return { + kind: "processingerror", + message: `"=end ${endFmtName}" does not match "=begin ${beginFmtName}"`, + }; + } + + isProcessingBlock = false; + break; + case "for": + let nestedForDataBlock = this.#buildDataBlockFromForPara(para); + + dataBlock.paragraphs.push(nestedForDataBlock); + break; + case "encoding": // ignored + case "unknown": // ignored + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return dataBlock; + } + + #buildDataBlockFromForPara(paragraph: ForParagraph): DataBlock | NormalDataBlock { + if (paragraph.formatname.startsWith(":")) { + let innerPara: OrdinaryParagraph = { + kind: "ordinary", + lines: paragraph.lines, + }; + + return { + kind: "normaldatablock", + formatname: paragraph.formatname, + parameter: "", + paragraphs: [innerPara], + }; + } + + let innerPara: DataParagraph = { + kind: "data", + lines: paragraph.lines, + }; + + return { + kind: "datablock", + formatname: paragraph.formatname, + parameter: "", + paragraphs: [innerPara], + }; + } +} + +/** Tracks the state for converting a {@link PodDocument} or {@link PodBlock} + * into Markdown. + */ +export class PodToMarkdownConverter { + #blockContentIter: Generator = this.#makeBlockContentIter([]); + #overBlockIndentLevels: Array = []; + + /** Converts a {@link PodDocument} or {@link PodBlock} to Markdown. */ + convert(pod: PodDocument | PodBlock): string { + let blocks: Array; + + if (pod.kind === "poddocument") { + blocks = pod.blocks; + } else { + blocks = [pod]; + } + + // Reset state + this.#blockContentIter = this.#makeBlockContentIter(blocks); + this.#overBlockIndentLevels = []; + + // Need to wrap getNextBlockContent into closure here, + // otherwise we get an access violation + const markdownLines = this.#convertContentUntilDone( + () => this.#getNextBlockContent() + ); + + let finalLines: Array = []; + + for (const line of markdownLines) { + let processedLine = line; + + if (processedLine.trim() === "") { + processedLine = ""; + } + + finalLines.push(processedLine); + } + + if (finalLines.length === 0) { + return ""; + } + + return finalLines.join("\n").trimEnd() + "\n"; + } + + #convertContentUntilDone( + getNext: () => PodBlockContent | undefined, + ): Array { + let lines: Array = []; + + let content: PodBlockContent | undefined; + let previousContent: PodBlockContent | undefined; + + while (true) { + previousContent = content; + content = getNext(); + + if (!content) { + break; + } + + if (!previousContent) { + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + if (isOverBlockWithItem(content)) { + if (!isItem(previousContent)) { + ensureLastLineEmpty(lines); + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + // Consecutive list items are rendered without an empty line inbetween. + // Keeps the list visually coherent. + if (!(isItem(content) && isItem(previousContent))) { + ensureLastLineEmpty(lines); + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + lines.push(...this.#convertBlockContent(content, getNext)); + } + + return lines; + } + + *#makeBlockContentIter(blocks: Array) { + for (const block of blocks) { + yield* block.paragraphs; + } + } + + #getNextBlockContent(): PodBlockContent | undefined { + let { value, done } = this.#blockContentIter.next(); + + if (done || value === undefined) { + return; + } + + return value; + } + + #convertBlockContent( + content: PodBlockContent, + getNext: () => PodBlockContent | undefined, + ): Array { + switch (content.kind) { + case "verbatim": + return this.#convertVerbatimPara(content); + case "ordinary": + return this.#convertOrdinaryPara(content); + case "head": + return this.#convertHeaderPara(content); + case "unordereditem": + case "ordereditem": + return this.#convertItemPara(content, getNext); + case "overblock": + return this.#convertOverBlock(content); + case "datablock": + return this.#convertDataBlock(content); + case "normaldatablock": + return this.#convertNormalDataBlock(content); + case "encoding": // ignored + case "unknown": // ignored + return []; + default: + const _exhaustiveCheck: never = content; + return _exhaustiveCheck; + } + } + + #convertVerbatimPara(verbatimPara: VerbatimParagraph): Array { + return [ + "```", + ...verbatimPara.lines.map((line) => tabsToSpaces(line, 8)), + "```", + ]; + } - if(!absolutePath) return; + #convertOrdinaryPara(ordinaryPara: OrdinaryParagraph): Array { + return ordinaryPara.lines + .map((line) => tabsToSpaces(line, 8)) + .map(processInlineElements); + } - try { - var fileContent = await fs.promises.readFile(absolutePath, "utf8"); - } catch { - return; + #convertHeaderPara(headerPara: HeaderParagraph): Array { + return [ + "#".repeat(headerPara.level) + " " + processInlineElements(headerPara.contents) + ]; } - // Initialize state variables - let inPodBlock = false; - let inRelevantBlock = true; - let podContent = ""; - let podBuffer = ""; // We "buffer" pod when searching to avoid empty sections - let meaningFullContent = false; - let searchItem; - if([PerlSymbolKind.Package, PerlSymbolKind.Module].includes(elem.type)){ - // Search all. Note I'm not really treating packages different from Modules - } else if([PerlSymbolKind.ImportedSub, PerlSymbolKind.Method, PerlSymbolKind.Inherited, PerlSymbolKind.PathedField, - PerlSymbolKind.LocalMethod, PerlSymbolKind.LocalSub].includes(elem.type)){ - searchItem = elem.name; - searchItem = searchItem.replace(/^[\w:]+::(\w+)$/, "$1"); // Remove package - } else { - return; + #convertItemPara( + itemPara: UnordererdItemParagraph | OrderedItemParagraph, + getNext: () => PodBlockContent | undefined, + ): Array { + let itemBeginning: string; + + if (itemPara.kind === "unordereditem") { + itemBeginning = "-"; + } else { + itemBeginning = `${itemPara.num}.`; + } + + const indentAndFormatList = (arr: Array): Array => { + if (arr.length === 0) { + return arr; + } + + let newArr: Array = []; + + newArr.push(itemBeginning + " " + arr[0]); + const indentLevel = itemBeginning.length + 1; + + for (const line of arr.slice(1)) { + newArr.push(" ".repeat(indentLevel) + line); + } + + return newArr; + }; + + if (itemPara.lines && itemPara.lines.length > 0) { + return indentAndFormatList(itemPara.lines.map(processInlineElements)); + } + + let nextContent = getNext(); + + if (!nextContent) { + return [itemBeginning]; + } + + if (nextContent.kind === "unordereditem" || nextContent.kind === "ordereditem") { + return [ + itemBeginning, + ...this.#convertItemPara(nextContent, getNext), + ]; + } + + return indentAndFormatList(this.#convertBlockContent(nextContent, getNext)); + } + + #convertOverBlock(block: OverBlock): Array { + const currentIndentLevel: number = Math.round(block.level); + this.#overBlockIndentLevels.push(currentIndentLevel); + + const indentList = (arr: Array): Array => { + let newArr: Array = []; + + const adjustedIndentLevel = this.#overBlockIndentLevels + .reduce((a, b) => a + b, 0) - currentIndentLevel; + + if (adjustedIndentLevel === 0) { + return arr; + } + + for (const line of arr) { + newArr.push(" ".repeat(adjustedIndentLevel) + line); + } + + return newArr; + } + + const overBlockIter = function* (): Generator { + yield* block.paragraphs; + }; + + const iter = overBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + let lines: Array = this.#convertContentUntilDone(getNext); + + if (lines[0]?.trim() === "") { + lines.shift(); + } + + if (lines[lines.length - 1]?.trim() === "") { + lines.pop(); + } + + let result = indentList(lines); + this.#overBlockIndentLevels.pop(); + return result; + } + + #convertDataBlock(block: DataBlock): Array { + const dataBlockIter = function* (): Generator { + yield* block.paragraphs; + }; + + const iter = dataBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + let dataStart: string; + let dataEnd: string; + + const formatname = block.formatname.trim(); + switch (formatname) { + case "code": + dataStart = "```perl"; + dataEnd = "```"; + break; + case "html": + dataStart = "```html"; + dataEnd = "```"; + break; + case "text": + dataStart = ""; + dataEnd = ""; + break; + default: + dataStart = ``; + dataEnd = `` + } + + let lines: Array = []; + let dataBlockPara: DataBlockContent | undefined; + + lines.push(dataStart); + + while (dataBlockPara = getNext()) { + switch (dataBlockPara.kind) { + case "data": + lines.push(...dataBlockPara.lines); + break; + case "datablock": + lines.push(dataEnd); + lines.push(...this.#convertDataBlock(dataBlockPara)); + lines.push(dataStart); + break; + case "normaldatablock": + lines.push(dataEnd); + lines.push(...this.#convertNormalDataBlock(dataBlockPara)); + lines.push(dataStart); + break; + default: + const _exhaustiveCheck: never = dataBlockPara; + return _exhaustiveCheck; + } + } + + lines.push(dataEnd); + + return lines; + } + + #convertNormalDataBlock(block: NormalDataBlock): Array { + const normalDataBlockIter = function* (): Generator { + yield* block.paragraphs; + }; + + const iter = normalDataBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return this.#convertContentUntilDone(getNext); + } +} + +/** Appends an empty line if the last element in the list isn't an empty line already. */ +function ensureLastLineEmpty(list: Array) { + if (list.at(-1)?.trim() !== "") { + list.push(""); + } +} + +function isItem(content: PodBlockContent): boolean { + return ["unordereditem", "ordereditem"].includes(content.kind); +} + +function isOverBlockWithItem(content: PodBlockContent): boolean { + if (content.kind === "overblock") { + const firstBlockContent = content.paragraphs.at(0); + if (firstBlockContent && isItem(firstBlockContent)) { + return true; + } } - let markdown = ""; + return false; +} + +function tabsToSpaces(line: string, spacesPerTab: number = 4): string { + return line.replaceAll("\t", " ".repeat(spacesPerTab)); +} - // Quick search for leading comments of a very specific form with comment blocks the preceed a sub (and aren't simply get/set without docs) - // These regexes are painful, but I didn't want to mix this with the line-by-line POD parsing which would overcomplicate that piece +/** Quick search for leading comments of a very specific form with comment + * blocks that preceed a sub (and aren't simply get/set without docs). + * + * Separate function in order to avoid overcomplicating the line-by-line POD parsing. + */ +function quickSearchByComment(symbolName: string, fileContent: string): string | undefined { let match, match2; - if(searchItem && (match = fileContent.match(`\\r?\\n#(?:####+| \-+) *(?:\\r?\\n# *)*${searchItem}\\r?\\n((?:(?:#.*| *)\\r?\\n)+)sub +${searchItem}\\b`))){ + + let markdown: string | undefined; + + if (match = fileContent.match(`\\r?\\n#(?:####+| \-+) *(?:\\r?\\n# *)*${symbolName}\\r?\\n((?:(?:#.*| *)\\r?\\n)+)sub +${symbolName}\\b`)) { // Ensure it's not an empty get/set pair. - if(!( (match2 = searchItem.match(/^get_(\w+)$/)) && match[1].match(new RegExp(`^(?:# +set_${match2[1]}\\r?\\n)?[\\s#]*$`)))){ + if ( + !( + (match2 = symbolName.match(/^get_(\w+)$/)) + && match[1].match(new RegExp(`^(?:# +set_${match2[1]}\\r?\\n)?[\\s#]*$`)) + ) + ) { let content = match[1].replace(/^ *#+ ?/gm,''); content = content.replace(/^\s+|\s+$/g,''); - if(content){ // It may still be empty for non-get functions - markdown += "```text\n" + content + "\n```\n" + + // May still be empty for non-get functions + if (content) { + markdown = "```text\n" + content + "\n```\n"; } } } - // Split the file into lines and iterate through them - const lines = fileContent.split(/\r?\n/); - for (const line of lines) { - if (line.startsWith("=cut")) { - // =cut lines are not added. - inPodBlock = false; - } + return markdown; +} - if (line.match(/^=(pod|head\d|over|item|back|begin|end|for|encoding)/)) { - inPodBlock = true; - meaningFullContent = false; - if(searchItem && line.match(new RegExp(`^=(head\\d|item).*\\b${searchItem}\\b`))){ - // This is structured so if we hit two relevant block in a row, we keep them both - inRelevantBlock = true; - } else { - inRelevantBlock = false; - podBuffer = ""; +function lookupSymbolInPod(symbolName: string, podDoc: PodDocument): PodDocument | undefined { + const podDocIter = function* ( + doc: PodDocument + ): Generator { + for (const block of doc.blocks) { + for (const content of block.paragraphs) { + yield content; } - } else if(line.match(/\w/)){ - // For this section, we found something that's not a header and has content - meaningFullContent = true; } + } - if(inPodBlock){ - if(searchItem){ - if(inRelevantBlock) { - podBuffer += line + "\n"; - } - } - else { - podContent += line + "\n"; + const iter = podDocIter(podDoc); + const getNextContent = () => { + const { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + let currentContent: PodBlockContent | undefined; + let foundHeader: HeaderParagraph | undefined; + let extractedContents: Array = []; + + while (currentContent = getNextContent()) { + if (foundHeader) { + if (currentContent.kind === "head" && currentContent.level <= foundHeader.level) { + break; } + + extractedContents.push(currentContent); } - if(meaningFullContent && podBuffer != ""){ - podContent += podBuffer; - podBuffer = ""; + if ( + currentContent.kind === "head" + && currentContent.contents.match(new RegExp(`^\\s*(\\$.*->)?${symbolName}(\\(.*\\))?\\b`)) + ) { + foundHeader = currentContent; + extractedContents.push(currentContent); } } - - markdown += convertPODToMarkdown(podContent); - return markdown; + if (extractedContents.length === 0) { + return; + } + + return { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: extractedContents, + }, + ], + }; } +export async function getPod( + elem: PerlElem, + perlDoc: PerlDocument, + modMap: Map +): Promise { + let symbolName: string | undefined; + + switch (elem.type) { + case PerlSymbolKind.Module: + case PerlSymbolKind.Package: + break; + case PerlSymbolKind.ImportedSub: + case PerlSymbolKind.Inherited: + case PerlSymbolKind.PathedField: + case PerlSymbolKind.LocalSub: + case PerlSymbolKind.LocalMethod: + symbolName = elem.name.replace(/^[\w:]+::(\w+)$/, "$1"); // Remove package + break; + default: + return; + } + + // File may not exist - return nothing if it doesn't. + const absolutePath = await resolvePathForDoc(elem, perlDoc, modMap); + + if (!absolutePath) { + return; + } + + let fileContents: string; + + try { + fileContents = await fs.promises.readFile(absolutePath, "utf8"); + } catch { + return; + } + + if (symbolName) { + let quickSearchMarkdown = quickSearchByComment(symbolName, fileContents); + if (quickSearchMarkdown) { + return quickSearchMarkdown; + } + } + + let parser = new RawPodParser(); + let rawPodDocResult = parser.parse(fileContents); + + if (rawPodDocResult.kind === "parseerror") { + // TODO: log error? --> needs access to settings for nLog + return; + } + + let processor = new PodProcessor(); + let podDocResult = processor.process(rawPodDocResult); + + if (podDocResult.kind === "processingerror") { + // TODO: log error? --> needs access to settings for nLog + return; + } + + let podDoc: PodDocument | undefined = podDocResult; + + if (symbolName) { + podDoc = lookupSymbolInPod(symbolName, podDocResult); + } + + if (!podDoc) { + return; + } + + let converter = new PodToMarkdownConverter(); + let markdown = converter.convert(podDoc); + + if (!markdown) { + return; + } + + return markdown; +} async function resolvePathForDoc(elem: PerlElem, perlDoc: PerlDocument, modMap: Map): Promise { let absolutePath = Uri.parse(elem.uri).fsPath; @@ -170,225 +1894,6 @@ async function badFile(fsPath: string): Promise { return false; } -type ConversionState = { - inList: boolean; - inVerbatim: boolean; - inCustomBlock: boolean; - markdown: string; - encoding: string | null; // Currently processed, but not used - waitingForListTitle: boolean; -}; - -const convertPODToMarkdown = (pod: string): string => { - let finalMarkdown: string = ""; - let state: ConversionState = { - inList: false, - inVerbatim: false, - inCustomBlock: false, - markdown: "", - encoding: null, - waitingForListTitle: false, - }; - - const lines = pod.split("\n"); - - for (let i = 0; i < lines.length; i++) { - let line = lines[i]; - - // Check for verbatim blocks first, perhaps ending a prior one - if (shouldConsiderVerbatim(line) || state.inVerbatim) { - state = processVerbatim(line, state); - finalMarkdown += state.markdown; - if (state.inVerbatim) { - // Don't need to keep going if we're still in verbatim mode - continue; - } - } - - // Inline transformations for code, bold, etc. - line = processInlineElements(line); - - // Handling =pod to start documentation - if (line.startsWith("=pod")) { - continue; // Generally, we just skip this. - } - // Headings - else if (line.startsWith("=head")) { - const output = processHeadings(line); - - if(/\w/.test(finalMarkdown) || !/^\n##+ NAME\n$/.test(output)){ - // I find it a waste of space to include the headline "NAME". We're short on space in the hover - finalMarkdown += output; - } - } - // List markers and items - else if (line.startsWith("=over") || line.startsWith("=item") || line.startsWith("=back") || state.waitingForListTitle) { - state = processList(line, state); - finalMarkdown += state.markdown; - } - // Custom blocks like =begin and =end - else if (line.startsWith("=begin") || line.startsWith("=end")) { - state = processCustomBlock(line, state); - finalMarkdown += state.markdown; - } - // Format-specific blocks like =for - else if (line.startsWith("=for")) { - finalMarkdown += processFormatSpecificBlock(line); - } - // Encoding - else if (line.startsWith("=encoding")) { - state = processEncoding(line, state); - } - - else if(state.inList){ - if(line){ - finalMarkdown += ` ${line} `; - } - } - // Generic text - else { - finalMarkdown += `${line}\n`; - } - } - - return finalMarkdown; -}; - -const processHeadings = (line: string): string => { - // Extract the heading level from the line. This will be a number from 1-6. - let level = parseInt(line.slice(5, 6)); - level = Math.min(level, 3); // Maximum 6 indentation levels in Markdown - // Ensure that the heading level is valid. - if (isNaN(level) || level < 1 || level > 6) { - return ""; - } - - // Extract the actual text of the heading, which follows the =head command. - const text = line.slice(7).trim(); - - // Convert the heading to its Markdown equivalent. I marked head1 -> ### because I prefer the compact form. - const markdownHeading = `\n##${"#".repeat(level)} ${text}\n`; - - return markdownHeading; -}; - -const processList = (line: string, state: ConversionState): ConversionState => { - let markdown: string = ""; - - // The =over command starts a list. - if (line.startsWith("=over")) { - state.inList = true; - markdown = "\n"; - } - - // The =item command denotes a list item. - else if (/^=item \*\s*$/.test(line)) { - state.waitingForListTitle= true; - markdown = ""; - } else if (state.waitingForListTitle && /[^\s]/.test(line)) { - state.waitingForListTitle = false; - markdown = `\n- ${line} \n `; - } - - // The =item command denotes a list item. - else if (line.startsWith("=item")) { - state.inList = true; - - // Remove the '=item' part to get the actual text for the list item. - let listItem = line.substring(6).trim(); - if (listItem.startsWith("* ")) // Doubled up list identifiers - listItem = listItem.replace("*", ""); - markdown = `\n- ${listItem} \n `; // Unordered list - } - // The =back command ends the list. - else if (line.startsWith("=back")) { - state.inList = false; - markdown = "\n"; - } - - return { - ...state, - markdown, - }; -}; - -const processCustomBlock = (line: string, state: ConversionState): ConversionState => { - let markdown = ""; - - // =begin starts a custom block - if (line.startsWith("=begin")) { - // Extract the format following =begin - const format = line.slice(7).trim(); - state.inCustomBlock = true; - - // Choose Markdown representation based on the format - switch (format) { - case "code": - markdown = "```perl\n"; - break; - // Add cases for other formats as needed - default: - markdown = `\n`; - break; - } - } - // =end ends the custom block - else if (line.startsWith("=end")) { - // Extract the format following =end - const format = line.slice(5).trim(); - state.inCustomBlock = false; - - // Close the Markdown representation - switch (format) { - case "code": - markdown = "```\n"; - break; - // Add cases for other formats as needed - default: - markdown = `\n`; - break; - } - } - - return { - ...state, - markdown, - }; -}; - -const processFormatSpecificBlock = (line: string): string => { - // The `=for` command itself is followed by the format and then the text. - const parts = line.split(" ").slice(1); - - if (parts.length < 2) { - return ""; - } - - // Extract the format and the actual text. - const format = parts[0].trim(); - const text = parts.slice(1).join(" ").trim(); - - // Choose the Markdown representation based on the format. - let markdown = ""; - switch (format) { - case "text": - // Plain text, just add it. - markdown = `${text}\n`; - break; - case "html": - // If it's HTML, encapsulate it within comments for safety. - markdown = `\n`; - break; - // Add more cases as you find the need for other specific formats. - default: - // For unsupported or custom formats, wrap it in a comment. - markdown = `\n`; - break; - } - - return markdown; -}; - // Mapping backticks to the Unicode non-character U+FFFF which is not allowed to appear in text const tempPlaceholder = '\uFFFF'; @@ -479,54 +1984,3 @@ const convertE = (content: string): string => { } } }; - -// Determine if the line should start a verbatim text block -const shouldConsiderVerbatim = (line: string): boolean => { - // A verbatim block starts with a whitespace but isn't part of a list - return /^\s+/.test(line); -}; - -// Process verbatim text blocks -const processVerbatim = (line: string, state: ConversionState): ConversionState => { - let markdown = ""; - if (/^\s+/.test(line)) { - // If this is the start of a new verbatim block, add Markdown code fence - if (!state.inVerbatim) { - markdown += "\n```\n"; - } - state.inVerbatim = true; - - // Trim some starting whitespace and add the line to the block - // Most pod code has 4 spaces or a tab, but I find 2 space indents most readable in the space constrained pop-up - markdown += line.replace(/^(?:\s{4}|\t)/, " ") + "\n"; - } - // } else if(/^\s+/.test(line)){ - // // Verbatim blocks in lists are tricky. Let's just do one line at a time for now so we don't need to keep track of indentation - // markdown = "```\n" + line + "```\n"; - // state.isLineVerbatim = true; - // } - else if (state.inVerbatim) { - // This line ends the verbatim block - state.inVerbatim = false; - markdown += "```\n"; // End the Markdown code fence - } - - return { - ...state, - markdown, - }; -}; - -const processEncoding = (line: string, state: ConversionState): ConversionState => { - // Extract the encoding type from the line - const encodingType = line.split(" ")[1]?.trim(); - - if (encodingType) { - return { - ...state, - encoding: encodingType, - }; - } - - return state; -}; From e37d68fb09242502b745b0b2610c158e98f570a3 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:38 +0100 Subject: [PATCH 05/20] server: pod: test: add unit tests for POD parsing & Markdown conversion These unit tests ensure that changes to the POD parser, processor and markdown converter don't cause any regressions in the resulting output. Furthermore, these tests also test for certain quirks and peculiarities that this specific POD parser implementation allows, which aren't allowed per the official spec. The tests can be run from the `server/` directory by running `npm ci` followed by `npm test`. The latter command is added by this commit. Signed-off-by: Max R. Carrara --- server/jest.config.ts | 17 + server/package.json | 12 +- server/src/pod.test.ts | 3178 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 3205 insertions(+), 2 deletions(-) create mode 100644 server/jest.config.ts create mode 100644 server/src/pod.test.ts diff --git a/server/jest.config.ts b/server/jest.config.ts new file mode 100644 index 0000000..3d4f68e --- /dev/null +++ b/server/jest.config.ts @@ -0,0 +1,17 @@ +import type { JestConfigWithTsJest } from 'ts-jest' + +const jestConfig: JestConfigWithTsJest = { + // [...] + transform: { + // '^.+\\.[tj]sx?$' to process ts,js,tsx,jsx with `ts-jest` + // '^.+\\.m?[tj]sx?$' to process ts,js,tsx,jsx,mts,mjs,mtsx,mjsx with `ts-jest` + '^.+\\.tsx?$': [ + 'ts-jest', + { + // ts-jest configuration goes here + }, + ], + }, +} + +export default jestConfig; diff --git a/server/package.json b/server/package.json index 70c8fd9..81413bb 100644 --- a/server/package.json +++ b/server/package.json @@ -20,12 +20,20 @@ "vscode-textmate": "^9.0.0", "vscode-oniguruma": "^2.0.1" }, - "scripts": {}, + "scripts": { + "test": "jest" + }, "main": "./src/dist/serverMain.js", "bin": { "perlnavigator": "./bin/perlnavigator" }, - "devDependencies": {}, + "devDependencies": { + "@types/jest": "^29.5.12", + "jest": "^29.7.0", + "ts-jest": "^29.2.4", + "ts-node": "^10.9.2", + "typescript": "^5.5.4" + }, "keywords": [ "perl", "lsp", diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts new file mode 100644 index 0000000..dea27ec --- /dev/null +++ b/server/src/pod.test.ts @@ -0,0 +1,3178 @@ +import { + HeaderLevel, + OrdinaryParagraph, + PodBlockContent, + PodDocument, + PodParagraph, + RawPodParser, + PodProcessingError, + PodProcessor, + RawPodDocument, + VerbatimParagraph, + PodToMarkdownConverter, + RawPodParseError, +} from "./pod"; + +// Used to return errors that are otherwise logged. +const podToMd = (fileContents: string): string | RawPodParseError | PodProcessingError => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + const converter = new PodToMarkdownConverter(); + + let parseRes = parser.parse(fileContents); + + if (parseRes.kind === "parseerror") { + return parseRes; + } + + let processRes = processor.process(parseRes); + + if (processRes.kind === "processingerror") { + return processRes; + } + + return converter.convert(processRes); +}; + + +describe("basic parser and processor tests", () => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + + test("empty file returns empty document", () => { + const fileContents = ""; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("single =pod ... =cut region", () => { + const fileContents = `\ +=pod + +=cut +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("repeated =pod ... =cut regions with Perl", () => { + const fileContents = `\ +=pod + +=cut + +# This is a Perl comment and doesn't get parsed. +sub foo { + my ($bar, $baz) = @_; + + die "baz didn't bar" if !defined($baz->($bar)); + + return "foo $bar"; +} + +=pod + +=cut + +# =pod +# +# This should not get parsed +# +# =cut + +=pod And this here +gets ignored. + +=cut This here +as well. +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + { + kind: "rawpodblock", + paragraphs: [], + }, + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [], + }, + { + kind: "podblock", + paragraphs: [], + }, + { + kind: "podblock", + paragraphs: [], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("repeated =pod command", () => { + const fileContents = `\ +=pod + +=pod + +=cut +`; + + const expected: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + expect(parser.parse(fileContents)).toMatchObject(expected); + }); + + test("repeated =cut command", () => { + const fileContents = `\ +=pod + +=cut + +=cut +`; + + const expected: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + expect(parser.parse(fileContents)).toMatchObject(expected); + }); + + test("unclosed pod block", () => { + const fileContents = `\ +=pod`; + + const expected: RawPodParseError = { + kind: "parseerror", + lineNo: 1, + } as RawPodParseError; // cast in order to omit matching on message + + expect(parser.parse(fileContents)).toMatchObject(expected); + }); + + test("document with ordinary paragraph", () => { + const fileContents = `\ +=pod + +This is an ordinary paragraph. + +=cut +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [ + { + kind: "ordinary", + lines: ["This is an ordinary paragraph."], + }, + ], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "ordinary", + lines: ["This is an ordinary paragraph."], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("document with verbatim paragraph", () => { + const fileContents = `\ +=pod + + This is a verbatim paragraph. Notice the space. + +=cut +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [ + { + kind: "verbatim", + lines: [" This is a verbatim paragraph. Notice the space."], + }, + ], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "verbatim", + lines: [" This is a verbatim paragraph. Notice the space."], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("document with ordinary and verbatim paragraphs", () => { + const fileContents = `\ +=pod + +This is an ordinary paragraph. It spans a single line. + + This is a verbatim paragraph. It spans a single line. + +This is an ordinary paragraph. +It spans two... +\tNo, three lines! + + This is a verbatim paragraph. +It spans two... +\tNo, three lines! +Actually, four. Sorry. + +=cut +`; + + const paragraphs: Array = [ + { + kind: "ordinary", + lines: [ + "This is an ordinary paragraph. It spans a single line.", + ], + }, + { + kind: "verbatim", + lines: [ + " This is a verbatim paragraph. It spans a single line.", + ], + }, + { + kind: "ordinary", + lines: [ + "This is an ordinary paragraph.", + "It spans two...", + "\tNo, three lines!", + ], + }, + { + kind: "verbatim", + lines: [ + " This is a verbatim paragraph.", + "It spans two...", + "\tNo, three lines!", + "Actually, four. Sorry.", + ], + }, + ]; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: paragraphs, + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("document with multiple regions and various paragraphs", () => { + const fileContents = `\ +=pod + +=head1 HEAD ONE + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + + + +=head2 HEAD TWO + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + +=head3 HEAD +THREE + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=head4 HEAD +F +O +U +R + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=head5 HEAD FIVE + +=over + +=item * + + + + + +Lorem Ipsum. + +=item * + +Dolor sit amet. + +=item +* + +Consectetur adipiscing elit. + +=back + +=head6 HEAD SIX + +=over 3.5 + +=back + +=over 42 + + +=item Morbi ut iaculis orci. Praesent +vehicula risus sed leo commodo, sit amet +laoreet dolor consectetur. + + +=back + +=over 0 + +=back + + + +=head7 UNKNOWN COMMAND PARAGRAPH + + + +=cut + +# This is Perl and is ignored by the parser. +sub foobar { + my ($foo, $bar) = @_; + + return "$foo $bar"; +} + + +=pod + +=encoding utf8 + +=begin foo + + +=end foo + +=begin bar + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=end bar + +=begin :baz some parameter stuff + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=end :baz + +=for comment This is a comment. + +=for comment This is +a +multiline +comment. + +=cut +`; + + const firstParagraphsRaw: Array = [ + { + kind: "head", + level: HeaderLevel.One, + contents: "HEAD ONE", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Two, + contents: "HEAD TWO", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Three, + contents: "HEAD THREE", + }, + { + kind: "verbatim", + lines: [" Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Four, + contents: "HEAD F O U R", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Five, + contents: "HEAD FIVE", + }, + { + kind: "over", + level: 4, + }, + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Lorem Ipsum."], + }, + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Dolor sit amet."], + }, + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Consectetur adipiscing elit."], + }, + { + kind: "back", + }, + { + kind: "head", + level: HeaderLevel.Six, + contents: "HEAD SIX", + }, + { + kind: "over", + level: 3.5, + }, + { + kind: "back", + }, + { + kind: "over", + level: 42, + }, + { + kind: "unordereditem", + lines: [ + "Morbi ut iaculis orci. Praesent", + "vehicula risus sed leo commodo, sit amet", + "laoreet dolor consectetur.", + ], + }, + { + kind: "back", + }, + { + kind: "over", + level: 4, + }, + { + kind: "back", + }, + { + kind: "unknown", + cmd: "head7", + contents: "UNKNOWN COMMAND PARAGRAPH", + }, + ]; + + const secondParagraphsRaw: Array = [ + { + kind: "encoding", + name: "utf8", + }, + { + kind: "begin", + formatname: "foo", + parameter: "", + }, + { + kind: "end", + formatname: "foo", + }, + { + kind: "begin", + formatname: "bar", + parameter: "", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "end", + formatname: "bar", + }, + { + kind: "begin", + formatname: ":baz", + parameter: "some parameter stuff", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "end", + formatname: ":baz", + }, + { + kind: "for", + formatname: "comment", + lines: ["This is a comment."], + }, + { + kind: "for", + formatname: "comment", + lines: [ + "This is", + "a", + "multiline", + "comment.", + ], + }, + ]; + + const firstParagraphsProcessed: Array = [ + { + kind: "head", + level: HeaderLevel.One, + contents: "HEAD ONE", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Two, + contents: "HEAD TWO", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Three, + contents: "HEAD THREE", + }, + { + kind: "verbatim", + lines: [" Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Four, + contents: "HEAD F O U R", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Five, + contents: "HEAD FIVE", + }, + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Lorem Ipsum."], + }, + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Dolor sit amet."], + }, + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Consectetur adipiscing elit."], + }, + ], + }, + { + kind: "head", + level: HeaderLevel.Six, + contents: "HEAD SIX", + }, + { + kind: "overblock", + level: 3.5, + paragraphs: [], + }, + { + kind: "overblock", + level: 42, + paragraphs: [ + { + kind: "unordereditem", + lines: [ + "Morbi ut iaculis orci. Praesent", + "vehicula risus sed leo commodo, sit amet", + "laoreet dolor consectetur.", + ], + }, + ], + }, + { + kind: "overblock", + level: 4, + paragraphs: [], + }, + // NOTE: unknown command paragraph is ignored and therefore not included here + ]; + + const secondParagraphsProcessed: Array = [ + // NOTE: encoding command paragraph is ignored and therefore not included here + { + kind: "datablock", + formatname: "foo", + parameter: "", + paragraphs: [], + }, + { + kind: "datablock", + formatname: "bar", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + "", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + "", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + ], + }, + ], + }, + { + kind: "normaldatablock", + formatname: ":baz", + parameter: "some parameter stuff", + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + ], + }, + { + kind: "datablock", + formatname: "comment", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: ["This is a comment."], + }, + ], + }, + { + kind: "datablock", + formatname: "comment", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "This is", + "a", + "multiline", + "comment.", + ], + }, + ], + }, + ]; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: firstParagraphsRaw, + }, + { + kind: "rawpodblock", + paragraphs: secondParagraphsRaw, + } + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: firstParagraphsProcessed, + }, + { + kind: "podblock", + paragraphs: secondParagraphsProcessed, + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); +}); + + +describe("complex POD processing cases", () => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + + // We forcibly omit the `message` property here so the object matcher ignores it. + const processingError = { kind: "processingerror" } as PodProcessingError; + + test("unclosed data block", () => { + const fileContents = `\ +=pod + +=begin foo + +=cut +`; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + }); + + test("unclosed normal data block", () => { + const fileContents = `\ +=pod + +=begin :foo + +=cut +`; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + }); + + test("unclosed over block", () => { + const fileContents = `\ +=pod + +=over 42 + +=cut +`; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + }); + + test("over blocks with invalid indent levels", () => { + const fileContents = `\ +=pod + +=over 0 + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=back + +=over -1 + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=back + +=cut +`; + + const paragraphs: Array = [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + ], + }, + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + + }) + + test("data block with ordinary and verbatim paragraphs", () => { + const fileContents = `\ +=pod + +=begin foo bar + +Ordinary. + + Verbatim. + +Ordinary. +But longer. + +\tVerbatim. +But longer. + +=end foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "foo", + parameter: "bar", + paragraphs: [ + { + kind: "data", + lines: [ + "Ordinary.", + "", + " Verbatim.", + "", + "Ordinary.", + "But longer.", + "", + "\tVerbatim.", + "But longer.", + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("normal data block with ordinary and verbatim paragraphs", () => { + const fileContents = `\ +=pod + +=begin :foo bar + +Ordinary. + + Verbatim. + +Ordinary. +But longer. + +\tVerbatim. +But longer. + +=end :foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":foo", + parameter: "bar", + paragraphs: [ + { + kind: "ordinary", + lines: ["Ordinary."], + }, + { + kind: "verbatim", + lines: [" Verbatim."], + }, + { + kind: "ordinary", + lines: ["Ordinary.", "But longer."], + }, + { + kind: "verbatim", + lines: ["\tVerbatim.", "But longer."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("data block with command paragraph", () => { + const fileContents = `\ +=pod + +=begin foo + +Ordinary. + + Verbatim. + +=head1 SOME COOL TITLE THAT SHOULDN'T BE HERE + +=end foo + +=cut +`; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + }); + + test("normal data block with command paragraph", () => { + const fileContents = `\ +=pod + +=begin :foo + +Ordinary. + + Verbatim. + +=head1 SOME COOL TITLE THAT CAN ACTUALLY BE HERE + +=end :foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":foo", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["Ordinary."], + }, + { + kind: "verbatim", + lines: [" Verbatim."], + }, + { + kind: "head", + level: HeaderLevel.One, + contents: "SOME COOL TITLE THAT CAN ACTUALLY BE HERE", + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("over block with header command paragraph", () => { + const fileContents = `\ +=pod + +=over 42 + +=head1 + +=back + +=cut +`; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + }); + + test("double-nested over block", () => { + const fileContents = `\ +=pod + +=over + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=over + +=item * + +Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor. + +=item * + +Pellentesque elementum luctus urna, et dapibus est faucibus eu. + +=back + +Mauris ut arcu ipsum. + +=back + +=cut +`; + + const paragraphs: Array = [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor."], + }, + { + kind: "unordereditem", + }, + { + kind: "ordinary", + lines: ["Pellentesque elementum luctus urna, et dapibus est faucibus eu."], + }, + ], + }, + { + kind: "ordinary", + lines: ["Mauris ut arcu ipsum."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("deeply nested over block", () => { + const fileContents = `\ +=pod + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +I know this looks weird, but this is still valid POD. + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=cut +`; + + const paragraphs: Array = [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["I know this looks weird, but this is still valid POD."], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("double-nested data block", () => { + const fileContents = `\ +=pod + +=begin foo + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=begin bar + +Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor. + +Pellentesque elementum luctus urna, et dapibus est faucibus eu. + +=end bar + +Mauris ut arcu ipsum. + +=end foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "foo", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "datablock", + formatname: "bar", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor.", + "", + "Pellentesque elementum luctus urna, et dapibus est faucibus eu.", + ], + }, + ], + }, + { + kind: "data", + lines: ["Mauris ut arcu ipsum."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("deeply nested data block", () => { + const fileContents = `\ +=pod + +=begin one + +=begin two + +=begin three + +=begin four + +=begin five + +=begin six + +=begin seven + +=begin eight + +=begin nine + +=begin ten + +I know this looks weird, but this is still valid POD. + +=end ten + +=end nine + +=end eight + +=end seven + +=end six + +=end five + +=end four + +=end three + +=end two + +=end one + +=cut +`; + + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "one", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "two", + parameter: "", + paragraphs: [ + + { + kind: "datablock", + formatname: "three", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "four", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "five", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "six", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "seven", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "eight", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "nine", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "ten", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: ["I know this looks weird, but this is still valid POD."], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("double-nested normal data block", () => { + const fileContents = `\ +=pod + +=begin :foo + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=begin :bar + +Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor. + +Pellentesque elementum luctus urna, et dapibus est faucibus eu. + +=end :bar + +Mauris ut arcu ipsum. + +=end :foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":foo", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "normaldatablock", + formatname: ":bar", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor."], + }, + { + kind: "ordinary", + lines: ["Pellentesque elementum luctus urna, et dapibus est faucibus eu."], + }, + ], + }, + { + kind: "ordinary", + lines: ["Mauris ut arcu ipsum."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("deeply nested normal data block", () => { + const fileContents = `\ +=pod + +=begin :one + +=begin :two + +=begin :three + +=begin :four + +=begin :five + +=begin :six + +=begin :seven + +=begin :eight + +=begin :nine + +=begin :ten + +I know this looks weird, but this is still valid POD. + +=end :ten + +=end :nine + +=end :eight + +=end :seven + +=end :six + +=end :five + +=end :four + +=end :three + +=end :two + +=end :one + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":one", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":two", + parameter: "", + paragraphs: [ + + { + kind: "normaldatablock", + formatname: ":three", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":four", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":five", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":six", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":seven", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":eight", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":nine", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":ten", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["I know this looks weird, but this is still valid POD."], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); +}); + +describe("pod to markdown conversion tests", () => { + test("no pod block results in no markdown", () => { + const fileContents = `\ +# This isn't getting parsed. +sub foobar : prototype($) { + my ($baz) = @_; + + return "baz: $baz"; +} +`; + const expected = ""; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("empty pod block results in no markdown", () => { + const fileContents = `\ +=pod + +=cut +`; + + const expected = ""; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("ordinary paragraphs to markdown paragraphs", () => { + const fileContents = `\ +=pod + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + +Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. + +=cut +`; + + const expected = `\ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + +Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("consecutive verbatim paragraphs to single markdown code block", () => { + const fileContents = `\ +=pod + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. + +=cut +`; + + const expected = `\ +\`\`\` + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("consecutive verbatim paragraphs with indentation to single markdown block", () => { + const fileContents = `\ +=pod + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus + arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac + faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. + +=cut`; + + const expected = `\ +\`\`\` + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus + arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac + faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("consecutive verbatim paragraphs with deep indentation to single markdown block", () => { + const fileContents = `\ +=pod + + async function getWorkspaceFoldersSafe(): Promise { + try { + const workspaceFolders = await connection.workspace.getWorkspaceFolders(); + if (!workspaceFolders) { + return []; + } else { + return workspaceFolders; + } + } catch (error) { + return []; + } + } + + function sendDiags(params: PublishDiagnosticsParams): void { + // Before sending new diagnostics, check if the file is still open. + if (documents.get(params.uri)) { + connection.sendDiagnostics(params); + } else { + connection.sendDiagnostics({ uri: params.uri, diagnostics: [] }); + } + } + +=cut`; + + const expected = `\ +\`\`\` + async function getWorkspaceFoldersSafe(): Promise { + try { + const workspaceFolders = await connection.workspace.getWorkspaceFolders(); + if (!workspaceFolders) { + return []; + } else { + return workspaceFolders; + } + } catch (error) { + return []; + } + } + + function sendDiags(params: PublishDiagnosticsParams): void { + // Before sending new diagnostics, check if the file is still open. + if (documents.get(params.uri)) { + connection.sendDiagnostics(params); + } else { + connection.sendDiagnostics({ uri: params.uri, diagnostics: [] }); + } + } +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("mixed verbatim and ordinary paragraphs", () => { + const fileContents = `\ +=pod + +Lorem ipsum dolor sit amet: + + Consectetur adipiscing elit. + +Integer purus nisi: + + Egestas et imperdiet sit amet, interdum ut nisl. + + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + +Vestibulum vel diam venenatis. +Feugiat ipsum nec. + +Aliquam erat: + + Maecenas dapibus arcu odio, ac dictum mauris cursus quis. + + Donec facilisis ex at nisi dictum, ac faucibus est elementum. + + Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. + +=cut +`; + + const expected = `\ +Lorem ipsum dolor sit amet: + +\`\`\` + Consectetur adipiscing elit. +\`\`\` + +Integer purus nisi: + +\`\`\` + Egestas et imperdiet sit amet, interdum ut nisl. + + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. +\`\`\` + +Vestibulum vel diam venenatis. +Feugiat ipsum nec. + +Aliquam erat: + +\`\`\` + Maecenas dapibus arcu odio, ac dictum mauris cursus quis. + + Donec facilisis ex at nisi dictum, ac faucibus est elementum. + + Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod headers to markdown headers", () => { + const fileContents = `\ +=pod + +=head1 HEAD ONE + +=head2 HEAD TWO + +=head3 HEAD THREE + +=head4 HEAD FOUR + +=head5 HEAD FIVE + +=head6 HEAD SIX + +=head7 IGNORED HEADER, NOT CONVERTED :) + +=cut +`; + + const expected = `\ +# HEAD ONE + +## HEAD TWO + +### HEAD THREE + +#### HEAD FOUR + +##### HEAD FIVE + +###### HEAD SIX +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); + +// NOTE: POD doesn't allow many of the following list cases and places restrictions +// on which kinds of consecutive `=item` paragraphs are allowed, for example. +// +// We're being explicitly lax here and don't conform to the spec for simplicity's +// sake. Being 100% compliant isn't really necessary anyways, because this isn't +// supposed to be a full-fledged POD-to-$FORMAT converter; it should just be sufficient +// for displaying hover documentation. +// +// See `man perlpodspec` or this page for more information: +// https://perldoc.perl.org/perlpodspec#About-=over...=back-Regions +describe("pod lists to markdown lists", () => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + const converter = new PodToMarkdownConverter(); + + const podToMd = (fileContents: string): string | RawPodParseError | PodProcessingError => { + let parseRes = parser.parse(fileContents); + + if (parseRes.kind === "parseerror") { + return parseRes; + } + + let processRes = processor.process(parseRes); + + if (processRes.kind === "processingerror") { + return processRes; + } + + return converter.convert(processRes); + }; + + // The POD spec doesn't really specify whether `=item` paragraphs are + // allowed outside of `=over ... =back` blocks or not, so we'll just allow + // them. + test("freestanding pod list to markdown list", () => { + const fileContents = `\ +=pod + +=head1 Unordered List + +=item * + +Foo. + +=item + +Bar. + +=item * + +Baz. + +=head1 Ordered List + +=item 1. + +Foo. + +=item 2. + +Bar. + +=item 3. + +Baz. + +=head1 Unordered List From Items With Text + +=item Foo. + +=item Bar. + +=item Baz. + +=cut`; + + const expected = `\ +# Unordered List + +- Foo. +- Bar. +- Baz. + +# Ordered List + +1. Foo. +2. Bar. +3. Baz. + +# Unordered List From Items With Text + +- Foo. +- Bar. +- Baz. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod list in over block to indented markdown list", () => { + const fileContents = `\ +=pod + +=head1 Nested Lists + +=over + +=item * + +Foo. + +=item + +Bar. + +=over + +=item + +Baz. + +=item * + +Qux. + +=back + +=back + +=over + +=item 1. + +Foo. + +=item 2. + +Bar. + +=over + +=item 3. + +Baz. + +=item 4. + +Qux. + +=back + +=back + +=over + +=item Foo. + +=item Bar. + +=over + +=item Baz. + +=item Qux. + +=back + +=back + +=cut`; + + const expected = `\ +# Nested Lists + +- Foo. +- Bar. + - Baz. + - Qux. + +1. Foo. +2. Bar. + 3. Baz. + 4. Qux. + +- Foo. +- Bar. + - Baz. + - Qux. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("ordered pod lists to ordered markdown lists", () => { + const fileContents = `\ +=pod + +=over 2 + +=item 1. + +This list is ordered. + +=item 2. + +According to the spec, each ordered list must start at number 1. + +=item 3. + +... and also precede in order, without skipping a number. + +=item 4 + +Everything's fine here. We may skip (forget) the dot. + +=item 5. + +Multiple +lines +are + indented +correctly. + +=back + +=over 2 + +=item 42. + +However, we avoid enforcing this ordering, because it makes things easier. + +=item 666 + +We are beyond feeble ordering. + +=item 100. + +Beholden to none. + +=back + +=cut +`; + + const expected = `\ +1. This list is ordered. +2. According to the spec, each ordered list must start at number 1. +3. ... and also precede in order, without skipping a number. +4. Everything's fine here. We may skip (forget) the dot. +5. Multiple + lines + are + indented + correctly. + +42. However, we avoid enforcing this ordering, because it makes things easier. +666. We are beyond feeble ordering. +100. Beholden to none. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("strange list items", () => { + const fileContents = `\ +=pod + +The POD spec only allows certain command paragraphs to appear in an over-back block. + +=over + +=item + +=over + +But we can nest things, because it all handles the same anyways. + +=back + +=item + + Verbatim paragraphs + are put into a neat + code block in markdown + though. + + That's fine and on spec. + + The code block even has indentation, oh my gosh. + +=item + +=item + +The item above is empty. Shouldn't be possible, but we also allow it. + +=item + +Note: We don't allow headers though. That's on spec. + +=over + +=item + +But it doesn't matter how deep you nest... + +=item + +=over + +=item You can always do weird things that conformant POD doesn't allow. + +=encoding utf-8 + +=item Encodings are ignored, for now. + +=foobar foo +bar +baz + +=item So are unknown command paragraphs. + +=back + +=back + +=back + +=cut`; + + const expected = `\ +The POD spec only allows certain command paragraphs to appear in an over-back block. + +- But we can nest things, because it all handles the same anyways. +- \`\`\` + Verbatim paragraphs + are put into a neat + code block in markdown + though. + + That's fine and on spec. + + The code block even has indentation, oh my gosh. + \`\`\` +- +- The item above is empty. Shouldn't be possible, but we also allow it. +- Note: We don't allow headers though. That's on spec. + - But it doesn't matter how deep you nest... + - - You can always do weird things that conformant POD doesn't allow. + - Encodings are ignored, for now. + - So are unknown command paragraphs. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("mixed list types to mixed markdown lists", () => { + const fileContents = `\ +=pod + +=item Freestanding list items like this one aren't explicitly specified. + +=item * + +So we'll just allow them. + +=item 42. + +We're even throwing in an "ordered" list item here. + +=cut + + + +=pod + +=over 2 + +=item This applies to over-back blocks as well, by the way. + +=item 10. + +We can do whatever we want, because conforming to the spec here would +be needlessly complex. + +=item It's not like +markdown cares either. +(Does it actually, though?) + +=back + +=cut +`; + + const expected = `\ +- Freestanding list items like this one aren't explicitly specified. +- So we'll just allow them. +42. We're even throwing in an "ordered" list item here. +- This applies to over-back blocks as well, by the way. +10. We can do whatever we want, because conforming to the spec here would + be needlessly complex. +- It's not like + markdown cares either. + (Does it actually, though?) +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("single pod list items between paragraphs to markdown", () => { + const fileContents = `\ +=pod + +There should be an empty line after this ordinary paragraph. + +After this one as well. + +=item This item is followed by an empty line. + +Hello, I'm an ordinary paragraph, and I'm followed by an empty line. + +Another one follows after this paragraph. + +=head3 Let's interleave more! + +=item * + +Item followed by empty line. + +Empty line after me. + +=item Item followed by empty line. + +Empty line after me. + +=item 42. + +Item followed by empty line. + +Empty line after me, then EOF. + +=cut +`; + + const expected = `\ +There should be an empty line after this ordinary paragraph. + +After this one as well. + +- This item is followed by an empty line. + +Hello, I'm an ordinary paragraph, and I'm followed by an empty line. + +Another one follows after this paragraph. + +### Let's interleave more! + +- Item followed by empty line. + +Empty line after me. + +- Item followed by empty line. + +Empty line after me. + +42. Item followed by empty line. + +Empty line after me, then EOF. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); + +describe("pod data blocks to markdown", () => { + test("single data block to markdown code block", () => { + const fileContents = `\ +=pod + +=begin some-data ...with extra parameters that get ignored + +Ordinary paragraphs... + + and verbatim paragraphs... + +just get parsed and internally converted to "data paragraphs" (which is +what the spec calls them). + + + +Multiple line breaks aren't preserved, though. Not sure if this matters, +but it makes things simpler. + +=end some-data + +=cut +`; + + const expected = `\ + +Ordinary paragraphs... + + and verbatim paragraphs... + +just get parsed and internally converted to "data paragraphs" (which is +what the spec calls them). + +Multiple line breaks aren't preserved, though. Not sure if this matters, +but it makes things simpler. + +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("HTML data block to markdown HTML code block", () => { + const fileContents = `\ +=pod + +=begin html + + + + Hello World! + + +

Hello World!

+

My purpose is to be a test case. Please free me.

+ + + +=end html + +=cut`; + + const expected = `\ +\`\`\`html + + + Hello World! + + +

Hello World!

+

My purpose is to be a test case. Please free me.

+ + +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested data blocks to nested markdown code blocks", () => { + const fileContents = `\ +=pod + +=begin foo + +Yeah, nesting is possible. + +=begin bar + +Because that's what the spec wants. + +And because it wasn't that hard to implement. + +=begin html + + + + +

Even if...

+ + + +=end html + +...this looks really weird. + +=begin html + + + + +

And out of place.

+ + + +=begin html + + + + +

Like genuinely weird.

+ + + +=end html + +=end html + +=end bar + +But hey, we can handle it. + +=end foo + +=cut +`; + + const expected = `\ + +Yeah, nesting is possible. + + +Because that's what the spec wants. + +And because it wasn't that hard to implement. + +\`\`\`html + + + +

Even if...

+ + +\`\`\` + +...this looks really weird. + +\`\`\`html + + + +

And out of place.

+ + +\`\`\` +\`\`\`html + + + +

Like genuinely weird.

+ + +\`\`\` +\`\`\`html +\`\`\` + + + +But hey, we can handle it. + +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("single normal data block to markdown", () => { + const fileContents = `\ +=pod + +=begin :foo + +This stuff in here gets treated as regular POD. + +=head3 Including commands. + +=over 3.5 + +=item Not gonna test this too thoroughly. + +=item 42. + +Because this isn't handled in any special manner. + +=item + +It really isn't. + +=back + + So yeah. The block above doesn't exist in Markdown at all. + + You won't even know it's there. + +=end :foo + +=cut +`; + + const expected = `\ +This stuff in here gets treated as regular POD. + +### Including commands. + +- Not gonna test this too thoroughly. +42. Because this isn't handled in any special manner. +- It really isn't. + +\`\`\` + So yeah. The block above doesn't exist in Markdown at all. + + You won't even know it's there. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested normal data blocks to markdown", () => { + const fileContents = `\ +=pod + +=begin :foo + +=head1 Foo. + +=begin :bar + +=head2 Bar. + +Lorem ipsum dolor sit amet. + +=end :bar + +Consectetur adipiscing elit. + +=end :foo + +=cut +`; + + const expected = `\ +# Foo. + +## Bar. + +Lorem ipsum dolor sit amet. + +Consectetur adipiscing elit. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested mixed data blocks to markdown", () => { + const fileContents = `\ +=pod + +=begin data + +This is where things get interesting. + +=begin html + +

Because the spec allows nesting data paragraphs ...

+ +=begin :no-data-here-lolz + +... with non-data data paragraphs. + +=head1 So it's possible to put headers in here, for example. + +=begin inner-data + +Also, you can add more begin-end blocks deeper inside all of this. + +Surprisingly, this wasn't too hard to support. + +=end inner-data + +=end :no-data-here-lolz + +

And then you can just continue with your HTML or something.

+ +=end html + +It's... odd, to say the least. + +=end data + +=cut +`; + + const expected = `\ + +This is where things get interesting. + +\`\`\`html +

Because the spec allows nesting data paragraphs ...

+\`\`\` +... with non-data data paragraphs. + +# So it's possible to put headers in here, for example. + + +Also, you can add more begin-end blocks deeper inside all of this. + +Surprisingly, this wasn't too hard to support. + +\`\`\`html +

And then you can just continue with your HTML or something.

+\`\`\` + +It's... odd, to say the least. + +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); + +describe("markdown inline formatting", () => { + test("pod bold to markdown bold", () => { + const fileContents = `\ +=pod + +=head1 B + +B<< This paragraph is in bold. >> + + B + +=item B<<< This item is in bold. >>> But not here. B<< Here we go again. >> + +=item * + +B B B + +=item 42. + +And B<<<<< this >>>>> one too. + +=cut +`; + + const expected = `\ +# **Bold header.** + +**This paragraph is in bold.** + +\`\`\` + B +\`\`\` + +- **This item is in bold.** But not here. **Here we go again.** +- **So is this one.** **Twice.** **Thrice.** +42. And **this** one too. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod italics to markdown bold", () => { + const fileContents = `\ +=pod + +=head1 I
+ +I<< This paragraph is in italics. >> + + I + +=item I<<< This item is in italics. >>> But not here. I<< Here we go again. >> + +=item * + +I I I + +=item 42. + +And I<<<<< this >>>>> one too. + +=cut +`; + + const expected = `\ +# *Header in italics.* + +*This paragraph is in italics.* + +\`\`\` + I +\`\`\` + +- *This item is in italics.* But not here. *Here we go again.* +- *So is this one.* *Twice.* *Thrice.* +42. And *this* one too. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod bold italics to markdown bold", () => { + const fileContents = `\ +=pod + +=head1 I> B> + +I<< B<< This paragraph is in bold italics. >> >> + + B> + +=item B<< I<<< This item is in bold italics. >>> >> But not here. B<< I<< Here it is again. >> >> + +=item * + +B>> Not here. B>> + +=item 42. + +And I>> one too. + +=cut +`; + + const expected = `\ +# ***Header in bold italics.*** ***In two different ways.*** + +***This paragraph is in bold italics.*** + +\`\`\` + B> +\`\`\` + +- ***This item is in bold italics.*** But not here. ***Here it is again.*** +- ***So is this one.*** Not here. ***And we're back.*** +42. And ***this*** one too. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod inline code to markdown inline code", () => { + const fileContents = `\ +=pod + +=head1 C Doesn't matter C. + +C<< This paragraph is inline code. >> + + C + +=item C<<< This item is inline code. >>> + +=item * + +C But not here. C + +=item 42. + +C C C + +=cut +`; + + const expected = `\ +# \`Headers allow inline code.\` Doesn't matter \`where\`. + +\`This paragraph is inline code.\` + +\`\`\` + C +\`\`\` + +- \`This item is inline code.\` +- \`So is this one.\` But not here. \`Here it's code again.\` +42. \`Same goes for this one.\` \`Twice.\` \`Thrice.\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); From 1eeba81599093101010a1ecf46d648e869b75ed1 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:51 +0100 Subject: [PATCH 06/20] server: pod: allow omitting `=cut` if EOF is reached .. and update the corresponding test case accordingly. Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 13 +++++++++---- server/src/pod.ts | 9 +++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index dea27ec..4c39b56 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -219,10 +219,15 @@ as well. const fileContents = `\ =pod`; - const expected: RawPodParseError = { - kind: "parseerror", - lineNo: 1, - } as RawPodParseError; // cast in order to omit matching on message + const expected: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; expect(parser.parse(fileContents)).toMatchObject(expected); }); diff --git a/server/src/pod.ts b/server/src/pod.ts index 2fbb731..2e6d767 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -365,13 +365,10 @@ export class RawPodParser { this.#currentBlock.paragraphs.push(para); } + // allow file to end without needing a matching =cut if (this.#currentBlock !== undefined) { - const lineNo = this.#currentBlock.lineNo as number; - return { - kind: "parseerror", - lineNo: lineNo, - message: `"=pod ... =cut" region beginning at line ${lineNo} was never closed (missing "=cut")` - }; + this.#parsedBlocks.push(this.#currentBlock); + this.#currentBlock = undefined; } return { From e99e607db676e96bf944e46fa62899f6bd34bf05 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:54 +0100 Subject: [PATCH 07/20] server: pod: fix `=for` command without any content causing error .. and also add a corresponding unit test. At the same time, if the `=for` paragraph doesn't contain any content (no lines), don't create an empty ordinary or data paragraph during the processing stage. In other words, a `=for` paragraph without any content now always results in an empty `=begin ... =end` block. Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 30 +++++++++++++++++++++++++++++ server/src/pod.ts | 43 +++++++++++++++++++++++++++++------------- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index 4c39b56..cf89eae 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -1873,6 +1873,36 @@ I know this looks weird, but this is still valid POD. expect(processor.process(result as RawPodDocument)).toMatchObject(expected); }); + + test("=for command without content", () => { + const fileContents = `\ +=pod + +=for comment + +=cut +`; + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "datablock", + formatname: "comment", + parameter: "", + paragraphs: [], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); }); describe("pod to markdown conversion tests", () => { diff --git a/server/src/pod.ts b/server/src/pod.ts index 2e6d767..82cb4c3 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -666,7 +666,9 @@ export class RawPodParser { ...line.matchAll(/^=for(\s+(?:?[-a-zA-Z0-9_]+)(\s+(?.*))?)?/g) ][0]; if (matchResult !== undefined) { - if (matchResult.groups?.formatname === undefined) { + const formatname = matchResult.groups?.formatname; + + if (formatname === undefined) { return { kind: "parseerror", lineNo: lineNo, @@ -674,7 +676,7 @@ export class RawPodParser { }; } - let contents = matchResult.groups?.contents.trim() || ""; + let contents = (matchResult.groups?.contents || "").trim(); // similar to parsing an ordinary or verbatim paragraph let currentLine: string | undefined = contents; @@ -689,7 +691,7 @@ export class RawPodParser { let para: ForParagraph = { kind: "for", lineNo: lineNo, - formatname: matchResult.groups?.formatname?.trim() as string, + formatname: formatname, lines: lines, }; @@ -1244,29 +1246,44 @@ class PodBlockProcessor { #buildDataBlockFromForPara(paragraph: ForParagraph): DataBlock | NormalDataBlock { if (paragraph.formatname.startsWith(":")) { - let innerPara: OrdinaryParagraph = { - kind: "ordinary", - lines: paragraph.lines, - }; + let paragraphs: Array; + + if (paragraph.lines.length === 0) { + paragraphs = []; + } else { + paragraphs = [ + { + kind: "ordinary", + lines: paragraph.lines, + } + ]; + } return { kind: "normaldatablock", formatname: paragraph.formatname, parameter: "", - paragraphs: [innerPara], + paragraphs: paragraphs, }; } - let innerPara: DataParagraph = { - kind: "data", - lines: paragraph.lines, - }; + let paragraphs: Array; + if (paragraph.lines.length === 0) { + paragraphs = []; + } else { + paragraphs = [ + { + kind: "data", + lines: paragraph.lines, + } + ]; + } return { kind: "datablock", formatname: paragraph.formatname, parameter: "", - paragraphs: [innerPara], + paragraphs: paragraphs, }; } } From 95c3ba24c88e10d3d15da2482efa0867f17ceee3 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:57 +0100 Subject: [PATCH 08/20] server: pod: allow `=begin` and `=over` blocks to be closed with `=cut` .. and update unit tests accordingly. Note that this obviously prevents these kinds of blocks to span across multiple `=pod ... =cut` regions, but this doesn't seem to show up anywhere anyways AFAIK. Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 59 +++++++++++++++++++++++++++++++++++++++--- server/src/pod.ts | 24 +++++++---------- 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index cf89eae..849a97a 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -920,6 +920,7 @@ describe("complex POD processing cases", () => { // We forcibly omit the `message` property here so the object matcher ignores it. const processingError = { kind: "processingerror" } as PodProcessingError; + // Spec requires matching =end, but we choose to tolerate this test("unclosed data block", () => { const fileContents = `\ =pod @@ -929,11 +930,29 @@ describe("complex POD processing cases", () => { =cut `; + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "datablock", + formatname: "foo", + paragraphs: [], + parameter: "", + }, + ], + }, + ], + }; + const result = parser.parse(fileContents); - expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); }); + // Spec requires matching =end, but we choose to tolerate this test("unclosed normal data block", () => { const fileContents = `\ =pod @@ -943,11 +962,29 @@ describe("complex POD processing cases", () => { =cut `; + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":foo", + paragraphs: [], + parameter: "", + }, + ], + }, + ], + }; + const result = parser.parse(fileContents); - expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); }); + // Spec requires matching =back, but we choose to tolerate this test("unclosed over block", () => { const fileContents = `\ =pod @@ -957,9 +994,25 @@ describe("complex POD processing cases", () => { =cut `; + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "overblock", + level: 42, + paragraphs: [], + }, + ], + }, + ], + }; + const result = parser.parse(fileContents); - expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); }); test("over blocks with invalid indent levels", () => { diff --git a/server/src/pod.ts b/server/src/pod.ts index 82cb4c3..16767b2 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -943,17 +943,15 @@ class PodBlockProcessor { paragraphs: [], }; - let isProcessingBlock = true; + let isProcessingBlock = true; // used to exit the loop from within switch let para: PodParagraph | undefined; while (isProcessingBlock) { para = this.#getNextParagraph(); if (para === undefined) { - return { - kind: "processingerror", - message: 'unexpected end of paragraphs while processing "=over ... =back" block', - }; + isProcessingBlock = false; + break; } switch (para.kind) { @@ -1049,17 +1047,15 @@ class PodBlockProcessor { paragraphs: [], }; - let isProcessingBlock = true; + let isProcessingBlock = true; // used to exit the loop from within switch let para: PodParagraph | undefined; while (isProcessingBlock) { para = this.#getNextParagraph(); if (para === undefined) { - return { - kind: "processingerror", - message: `unexpected end of paragraphs while processing "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, - }; + isProcessingBlock = false; + break; } switch (para.kind) { @@ -1148,17 +1144,15 @@ class PodBlockProcessor { paragraphs: [], }; - let isProcessingBlock = true; + let isProcessingBlock = true; // used to exit the loop from within switch let para: PodParagraph | undefined; while (isProcessingBlock) { para = this.#getNextParagraph(); if (para === undefined) { - return { - kind: "processingerror", - message: `unexpected end of paragraphs while processing "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, - }; + isProcessingBlock = false; + break; } switch (para.kind) { From 361442f5ae41d2b3d5d19f388a1a7eda1247177e Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:42:59 +0100 Subject: [PATCH 09/20] server: pod: start markdown header levels at `3` In other words, this restores the original behaviour of e.g. `=head1` in POD becoming `###` in Markdown. The header level is incremented by `2` up until a maximum level of `6` in order to not exceed the maximum header level of Markdown. Unit tests are updated correspondingly. Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 37 +++++++++++++++++++------------------ server/src/pod.ts | 5 ++++- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index 849a97a..2da664a 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -2217,6 +2217,7 @@ Aliquam erat: expect(result).toEqual(expected); }); + // headers in markdown start at level 3, but do not exceed level 6 test("pod headers to markdown headers", () => { const fileContents = `\ =pod @@ -2239,15 +2240,15 @@ Aliquam erat: `; const expected = `\ -# HEAD ONE +### HEAD ONE -## HEAD TWO +#### HEAD TWO -### HEAD THREE +##### HEAD THREE -#### HEAD FOUR +###### HEAD FOUR -##### HEAD FIVE +###### HEAD FIVE ###### HEAD SIX `; @@ -2335,19 +2336,19 @@ Baz. =cut`; const expected = `\ -# Unordered List +### Unordered List - Foo. - Bar. - Baz. -# Ordered List +### Ordered List 1. Foo. 2. Bar. 3. Baz. -# Unordered List From Items With Text +### Unordered List From Items With Text - Foo. - Bar. @@ -2432,7 +2433,7 @@ Qux. =cut`; const expected = `\ -# Nested Lists +### Nested Lists - Foo. - Bar. @@ -2723,7 +2724,7 @@ Hello, I'm an ordinary paragraph, and I'm followed by an empty line. Another one follows after this paragraph. -### Let's interleave more! +##### Let's interleave more! - Item followed by empty line. @@ -2970,7 +2971,7 @@ It really isn't. const expected = `\ This stuff in here gets treated as regular POD. -### Including commands. +##### Including commands. - Not gonna test this too thoroughly. 42. Because this isn't handled in any special manner. @@ -3012,9 +3013,9 @@ Consectetur adipiscing elit. `; const expected = `\ -# Foo. +### Foo. -## Bar. +#### Bar. Lorem ipsum dolor sit amet. @@ -3074,7 +3075,7 @@ This is where things get interesting. \`\`\` ... with non-data data paragraphs. -# So it's possible to put headers in here, for example. +### So it's possible to put headers in here, for example. Also, you can add more begin-end blocks deeper inside all of this. @@ -3120,7 +3121,7 @@ And B<<<<< this >>>>> one too. `; const expected = `\ -# **Bold header.** +### **Bold header.** **This paragraph is in bold.** @@ -3162,7 +3163,7 @@ And I<<<<< this >>>>> one too. `; const expected = `\ -# *Header in italics.* +### *Header in italics.* *This paragraph is in italics.* @@ -3204,7 +3205,7 @@ And I>> one too. `; const expected = `\ -# ***Header in bold italics.*** ***In two different ways.*** +### ***Header in bold italics.*** ***In two different ways.*** ***This paragraph is in bold italics.*** @@ -3246,7 +3247,7 @@ C C C `; const expected = `\ -# \`Headers allow inline code.\` Doesn't matter \`where\`. +### \`Headers allow inline code.\` Doesn't matter \`where\`. \`This paragraph is inline code.\` diff --git a/server/src/pod.ts b/server/src/pod.ts index 16767b2..45f5522 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1434,8 +1434,11 @@ export class PodToMarkdownConverter { } #convertHeaderPara(headerPara: HeaderParagraph): Array { + // + 2 because we start at an h3 (###) for readability + const level = Math.min(headerPara.level + 2, 6); + return [ - "#".repeat(headerPara.level) + " " + processInlineElements(headerPara.contents) + "#".repeat(level) + " " + processInlineElements(headerPara.contents) ]; } From 38f4bfbe8dc85bf4c407b323fa5b07d99abc8d01 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:43:03 +0100 Subject: [PATCH 10/20] server: pod: fix indentation being converted incorrectly Instead of always omitting the last indentation level of nested lists, the initial level of the first nesting is now unindented. This means that nested `=over ... =back` blocks with the levels `5, 3, 2` are converted to e.g.: - foo (0) - bar (3) - baz (2) .. instead of: - foo (0) - bar (5) - baz (3) Unit tests are updated correspondingly. Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 65 ++++++++++++++++++++++++++++++++++++++++-- server/src/pod.ts | 13 ++++++--- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index 2da664a..0a807a0 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -2456,6 +2456,65 @@ Qux. expect(result).toEqual(expected); }); + test("nested lists with varying indentation levels", () => { + const fileContents = `\ +=pod + +=over + +=item * foo + +=over 2 + +=item * bar + +=over 3 + +=item * baz + +=back + +=back + +=back + + +=over + +=item 1. foo + +=over 2 + +=item 2. bar + +=over 3 + +=item 3. baz + +=back + +=back + +=back + +=cut +`; + + const expected = `\ +- foo + - bar + - baz + +1. foo + 2. bar + 3. baz +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + test("ordered pod lists to ordered markdown lists", () => { const fileContents = `\ =pod @@ -2613,9 +2672,9 @@ The POD spec only allows certain command paragraphs to appear in an over-back bl - The item above is empty. Shouldn't be possible, but we also allow it. - Note: We don't allow headers though. That's on spec. - But it doesn't matter how deep you nest... - - - You can always do weird things that conformant POD doesn't allow. - - Encodings are ignored, for now. - - So are unknown command paragraphs. + - - You can always do weird things that conformant POD doesn't allow. + - Encodings are ignored, for now. + - So are unknown command paragraphs. `; const result = podToMd(fileContents); diff --git a/server/src/pod.ts b/server/src/pod.ts index 45f5522..ae70548 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1492,14 +1492,19 @@ export class PodToMarkdownConverter { } #convertOverBlock(block: OverBlock): Array { - const currentIndentLevel: number = Math.round(block.level); - this.#overBlockIndentLevels.push(currentIndentLevel); + const initialIndentLevel: number = this.#overBlockIndentLevels.reduce((a, b) => a + b, 0); + this.#overBlockIndentLevels.push(Math.round(block.level)); const indentList = (arr: Array): Array => { let newArr: Array = []; - const adjustedIndentLevel = this.#overBlockIndentLevels - .reduce((a, b) => a + b, 0) - currentIndentLevel; + let adjustedIndentLevel: number; + if (initialIndentLevel === 0) { + adjustedIndentLevel = 0; + } else { + adjustedIndentLevel = this.#overBlockIndentLevels + .reduce((a, b) => a + b, -initialIndentLevel); + } if (adjustedIndentLevel === 0) { return arr; From d78e521e8b7ebdd54aceb4f767daf7fe644140f2 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:43:10 +0100 Subject: [PATCH 11/20] server: pod: add utility functions for iterators .. and use type predicates instead of plain booleans as return types for the `isItem` and `isOverBlockWithItem` functions. Signed-off-by: Max R. Carrara --- server/src/pod.ts | 138 +++++++++++++++++++++++++++++----------------- 1 file changed, 88 insertions(+), 50 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index ae70548..fc46fab 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1517,21 +1517,7 @@ export class PodToMarkdownConverter { return newArr; } - const overBlockIter = function* (): Generator { - yield* block.paragraphs; - }; - - const iter = overBlockIter(); - - const getNext = () => { - let { value, done } = iter.next(); - - if (done || value === undefined) { - return; - } - - return value; - }; + const getNext = makeOverBlockIterGetter(block); let lines: Array = this.#convertContentUntilDone(getNext); @@ -1549,21 +1535,7 @@ export class PodToMarkdownConverter { } #convertDataBlock(block: DataBlock): Array { - const dataBlockIter = function* (): Generator { - yield* block.paragraphs; - }; - - const iter = dataBlockIter(); - - const getNext = () => { - let { value, done } = iter.next(); - - if (done || value === undefined) { - return; - } - - return value; - }; + const getNext = makeDataBlockIterGetter(block); let dataStart: string; let dataEnd: string; @@ -1619,21 +1591,7 @@ export class PodToMarkdownConverter { } #convertNormalDataBlock(block: NormalDataBlock): Array { - const normalDataBlockIter = function* (): Generator { - yield* block.paragraphs; - }; - - const iter = normalDataBlockIter(); - - const getNext = () => { - let { value, done } = iter.next(); - - if (done || value === undefined) { - return; - } - - return value; - }; + const getNext = makeNormalDataBlockIterGetter(block); return this.#convertContentUntilDone(getNext); } @@ -1646,16 +1604,14 @@ function ensureLastLineEmpty(list: Array) { } } -function isItem(content: PodBlockContent): boolean { +function isItem(content: PodBlockContent): content is UnordererdItemParagraph | OrderedItemParagraph { return ["unordereditem", "ordereditem"].includes(content.kind); } -function isOverBlockWithItem(content: PodBlockContent): boolean { +function isOverBlockWithItem(content: PodBlockContent): content is OverBlock { if (content.kind === "overblock") { const firstBlockContent = content.paragraphs.at(0); - if (firstBlockContent && isItem(firstBlockContent)) { - return true; - } + return firstBlockContent !== undefined && isItem(firstBlockContent); } return false; @@ -1665,6 +1621,88 @@ function tabsToSpaces(line: string, spacesPerTab: number = 4): string { return line.replaceAll("\t", " ".repeat(spacesPerTab)); } +function makePodDocContentIterGetter(podDoc: PodDocument): () => PodBlockContent | undefined { + const podDocContentIter = function*(): Generator { + for (const block of podDoc.blocks) { + yield* block.paragraphs; + } + } + + const iter = podDocContentIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + +function makeOverBlockIterGetter(block: OverBlock): () => OverBlockContent | undefined { + const overBlockIter = function*(): Generator { + yield* block.paragraphs; + } + + const iter = overBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + +function makeDataBlockIterGetter(block: DataBlock): () => DataBlockContent | undefined { + const dataBlockIter = function*(): Generator { + yield* block.paragraphs; + } + + const iter = dataBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + +function makeNormalDataBlockIterGetter(block: NormalDataBlock): () => PodBlockContent | undefined { + const normalDataBlockIter = function*(): Generator { + yield* block.paragraphs; + } + + const iter = normalDataBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + /** Quick search for leading comments of a very specific form with comment * blocks that preceed a sub (and aren't simply get/set without docs). * From 08887a8bf767b8484ff81c2c66c6be935c4aa473 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:43:12 +0100 Subject: [PATCH 12/20] server: pod: add support for alt. syntax for S, F, X formatting codes Although presumably pretty rare, it doesn't hurt to support them. Signed-off-by: Max R. Carrara --- server/src/pod.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/pod.ts b/server/src/pod.ts index fc46fab..1500503 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1998,12 +1998,15 @@ const processInlineElements = (line: string): string => { // Handle non-breaking spaces (S) line = line.replace(/S<([^<>]+)>/g, "$1"); + line = line.replace(/S<<+\s+(.+?)\s+>+>/g, "$1"); // Handle file names (F), converting to italics line = line.replace(/F<([^<>]+)>/g, "*$1*"); + line = line.replace(/F<<+\s+(.+?)\s+>+>/g, "*$1*"); // Handle index entries (X), ignoring as Markdown doesn't have an index line = line.replace(/X<([^<>]+)>/g, ""); + line = line.replace(/X<<+\s+(.+?)\s+>+>/g, "$1"); return line; }; From 4e5e878c6130f547f66046dde87fcde624e953ea Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:43:13 +0100 Subject: [PATCH 13/20] server: pod: rework symbol lookup This commit adds support for the following: - symbol lookup for `=item` paragraphs, restoring the original behaviour - consecutive `=item` or `=head\d` paragraphs (of the same level) for the same subroutine, method, etc., e.g. in order to show an alternative way to call the subroutine, are now displayed as expected - support symbol lookup via index entries (`X`), which may be used to e.g. look up an alias of a subroutine To elaborate on the last point, here's a quick example: `File::Copy::move` has an alias called `File::Copy::mv`. The documentation for move contains an index entry for `mv`, so that the docs count for both subroutines: =item move X X X The symbol lookup regex will now also match index entries, so if a signature doesn't match, the regex will try to match for an index entry, if there is any. This makes it possible to look up the docstring for `File::Copy::mv`. Also, add documentation for the `lookupSymbolInPod` function. Signed-off-by: Max R. Carrara --- server/src/pod.ts | 149 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 36 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index 1500503..2bc22cd 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1734,63 +1734,140 @@ function quickSearchByComment(symbolName: string, fileContent: string): string | return markdown; } +/** Look up a symbol's name in a {@link PodDocument}. + * + * This searches the given POD doc for any `=item` or `=head\d` command paragraph + * that corresponds to the given `symbolName`. + * + * If the matched paragraph is a `=head\d`, returns all paragraphs starting from + * and including the matched header up until either a non-matching `=head\d` + * of the same level *or* a `=head\d` with a higher level is encountered. + * + * If the matched paragraph is an `=item`, returns all paragraphs starting from + * and including the matched item up until either a non-matching `=item` + * *or* the end of the `=item`'s `=over ... =back` block is reached (if any). + */ function lookupSymbolInPod(symbolName: string, podDoc: PodDocument): PodDocument | undefined { - const podDocIter = function* ( - doc: PodDocument - ): Generator { - for (const block of doc.blocks) { - for (const content of block.paragraphs) { - yield content; - } - } - } + const symbolRegex = new RegExp( + `(^\\s*(\\$.*->)?${symbolName}(\\(.*\\))?)|(X<${symbolName}>)|(X<<+\\s+${symbolName}\\s+>+>)` + ); - const iter = podDocIter(podDoc); - const getNextContent = () => { - const { value, done } = iter.next(); + let extractedContents = matchHeaderRegionInPod( + symbolRegex, + makePodDocContentIterGetter(podDoc), + ); - if (done || value === undefined) { - return; - } + if (extractedContents.length === 0) { + extractedContents = matchItemRegionInPod( + symbolRegex, + makePodDocContentIterGetter(podDoc), + ); + } - return value; + if (extractedContents.length === 0) { + return; + } + + return { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: extractedContents, + }, + ], }; +} - let currentContent: PodBlockContent | undefined; - let foundHeader: HeaderParagraph | undefined; +function matchHeaderRegionInPod(regex: RegExp, getNext: () => PodBlockContent | undefined): Array { + let currentContent: PodBlockContent | undefined; let extractedContents: Array = []; - while (currentContent = getNextContent()) { + const headerMatchesSymbol = (headerPara: HeaderParagraph) => headerPara.contents.match(regex); + + let foundHeader: HeaderParagraph | undefined; + + while (currentContent = getNext()) { if (foundHeader) { - if (currentContent.kind === "head" && currentContent.level <= foundHeader.level) { - break; + if (currentContent.kind === "head") { + // Next =headN command also matches regex, assume it's an alternative + // signature for the same symbol + if (currentContent.level === foundHeader.level && headerMatchesSymbol(currentContent)) { + extractedContents.push(currentContent); + continue; + } + + if (currentContent.level <= foundHeader.level) { + break; + } } extractedContents.push(currentContent); + continue; } - if ( - currentContent.kind === "head" - && currentContent.contents.match(new RegExp(`^\\s*(\\$.*->)?${symbolName}(\\(.*\\))?\\b`)) - ) { + if (currentContent.kind === "head" && headerMatchesSymbol(currentContent)) { foundHeader = currentContent; extractedContents.push(currentContent); } } - if (extractedContents.length === 0) { - return; + return extractedContents; +} + +function matchItemRegionInPod(regex: RegExp, getNext: () => PodBlockContent | undefined): Array { + let currentContent: PodBlockContent | undefined; + let extractedContents: Array = []; + + const itemMatchesSymbol = (itemPara: UnordererdItemParagraph | OrderedItemParagraph) => { + if (itemPara.lines === undefined) { + return false; + } + + for (const line of itemPara.lines) { + if (line.match(regex)) { + return true; + } + } + + return false; } - return { - kind: "poddocument", - blocks: [ - { - kind: "podblock", - paragraphs: extractedContents, - }, - ], - }; + let foundItem: UnordererdItemParagraph | OrderedItemParagraph | undefined; + + while (currentContent = getNext()) { + if (foundItem) { + if (isItem(currentContent)) { + // Next =item command also matches regex, assume it's an alternative + // signature for the same symbol + if (itemMatchesSymbol(currentContent)) { + extractedContents.push(currentContent); + continue; + } + + break; + } + + extractedContents.push(currentContent); + continue; + } + + switch (currentContent.kind) { + case "unordereditem": + case "ordereditem": + if (itemMatchesSymbol(currentContent)) { + foundItem = currentContent; + extractedContents.push(currentContent); + } + break; + case "overblock": + return matchItemRegionInPod(regex, makeOverBlockIterGetter(currentContent)); + case "normaldatablock": + return matchItemRegionInPod(regex, makeNormalDataBlockIterGetter(currentContent)); + } + } + + return extractedContents; } export async function getPod( From b907278f2a16e6991b14dc51004e1eeeb5ea7099 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:43:19 +0100 Subject: [PATCH 14/20] server: pod: format pod document in general and after symbol lookup For all general cases, the `formatPodDoc` function is now used to perform various actions on a processed pod document. At the moment, this only removes `=head1 NAME` paragraphs from the document, as those are kind of unnecessary in hover docs. After a successful symbol lookup, the returned pod document is formatted in the following ways: - `=item` paragraphs of the same indentation level are converted to header paragraphs, which makes the matched symbol appear more nicely. - `=head\d` paragraphs are now "normalized" -- this means that if e.g. a subroutine signature starts at `=head4`, it will be converted to a `=head1` internally, which is then adapted as needed by the markdown converter. The above results in the docs of all looked up symbols being uniformly rendered as `h3` (`###`), with sub-headings being converted to the respective relative level. Signed-off-by: Max R. Carrara --- server/src/pod.ts | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/server/src/pod.ts b/server/src/pod.ts index 2bc22cd..ade8732 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1870,6 +1870,60 @@ function matchItemRegionInPod(regex: RegExp, getNext: () => PodBlockContent | un return extractedContents; } +function formatPodDocForSymbol(podDoc: PodDocument) { + // use this as default and let converter handle the adjustment + const symbolHeaderLevel: number = 1; + + // technically there should only be one block, but playing it safe here + for (const block of podDoc.blocks) { + block.paragraphs.forEach((para, index) => { + if (isItem(para)) { + const replacementHeaderPara: HeaderParagraph = { + kind: "head", + level: symbolHeaderLevel, + contents: (para.lines ?? []).join(" "), + lineNo: para.lineNo, + }; + + block.paragraphs[index] = replacementHeaderPara; + } + }); + } + + let highestHeaderLevel: number = 6; + + for (const block of podDoc.blocks) { + for (const para of block.paragraphs) { + if (para.kind === "head" && para.level < highestHeaderLevel) { + highestHeaderLevel = para.level; + } + } + } + + // normalize header levels to `symbolHeaderLevel` + for (const block of podDoc.blocks) { + for (const para of block.paragraphs) { + if (para.kind !== "head") { + continue; + } + + para.level = para.level - (highestHeaderLevel - symbolHeaderLevel); + }; + } +} + +function formatPodDoc(podDoc: PodDocument) { + for (const block of podDoc.blocks) { + block.paragraphs = block.paragraphs.filter((para) => { + return !( + para.kind === "head" + && para.level === 1 + && para.contents.trim() === "NAME" + ); + }); + } +} + export async function getPod( elem: PerlElem, perlDoc: PerlDocument, @@ -1934,12 +1988,18 @@ export async function getPod( if (symbolName) { podDoc = lookupSymbolInPod(symbolName, podDocResult); + + if (podDoc) { + formatPodDocForSymbol(podDoc); + } } if (!podDoc) { return; } + formatPodDoc(podDoc); + let converter = new PodToMarkdownConverter(); let markdown = converter.convert(podDoc); From fe7c1e70a050b61981a77edcfaad9f399f40eae7 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Thu, 2 Jan 2025 14:49:20 +0100 Subject: [PATCH 15/20] server: pod: fix symbol lookup edge case regarding list items Some Perl modules in the wild document their subroutines like this: =over =item * $obj->foo() Lorem ipsum odor amet, consectetuer adipiscing elit. Quis a habitasse hendrerit efficitur phasellus lacus. [...] =back While this would render to Markdown as expected, the =item paragraph would still contain no text in terms of how the data is represented. This commit makes it so that if an =item without text is followed by an ordinary paragraph, the latter is melded into the former during the initial parse of the POD document. Thus, symbol lookup for this case in particular works again as expected. Test cases are adapted correspondingly. Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 24 ------- server/src/pod.ts | 152 ++++++++++++++++++++++++++++++++--------- 2 files changed, 118 insertions(+), 58 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index 0a807a0..917f3a0 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -582,23 +582,14 @@ comment. }, { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Lorem Ipsum."], }, { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Dolor sit amet."], }, { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Consectetur adipiscing elit."], }, { @@ -758,23 +749,14 @@ comment. paragraphs: [ { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Lorem Ipsum."], }, { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Dolor sit amet."], }, { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Consectetur adipiscing elit."], }, ], @@ -1332,16 +1314,10 @@ Mauris ut arcu ipsum. paragraphs: [ { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor."], }, { kind: "unordereditem", - }, - { - kind: "ordinary", lines: ["Pellentesque elementum luctus urna, et dapibus est faucibus eu."], }, ], diff --git a/server/src/pod.ts b/server/src/pod.ts index ade8732..bbb61cb 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -277,12 +277,50 @@ export interface PodDocument { blocks: Array; } +/** Represents the iteration over lines of text. + * Doesn't actually conform to the iterator protocol. + * + * Is used as a helper structure for {@link RawPodParser}, allowing the parser + * to backtrack when trying to parse `=item` paragraphs without text followed + * by an ordinary paragraph. + */ +class LinesIterator { + #lines: Array; + #currentLineNo: number; + #savedLineNumbers: Array; + + constructor(contents: string) { + this.#lines = contents.split(/\r?\n/); + this.#currentLineNo = 0; + this.#savedLineNumbers = []; + } + + next(): string | undefined { + if (this.#currentLineNo < this.#lines.length) { + return this.#lines[this.#currentLineNo++]; + } + + return undefined; + } + + currentLineNo(): number { + return this.#currentLineNo; + } + + save() { + this.#savedLineNumbers.push(this.#currentLineNo); + } + + rewind() { + this.#currentLineNo = this.#savedLineNumbers.pop() ?? 0; + } +} + /** Tracks the state for parsing POD content from a file. * See {@link parse} for more information. */ export class RawPodParser { - #lineIter: Generator = this.#makeLineIter([]); - #currentLineNo: number = 0; + #lineIter: LinesIterator = new LinesIterator(""); #currentBlock?: RawPodBlock = undefined; #parsedBlocks: Array = []; @@ -292,17 +330,14 @@ export class RawPodParser { * This is done via the {@link PodProcessor}. */ parse(fileContents: string): RawPodDocument | RawPodParseError { - const lines = fileContents.split(/\r?\n/); - // Reset state - this.#lineIter = this.#makeLineIter(lines); - this.#currentLineNo = 0; + this.#lineIter = new LinesIterator(fileContents); this.#currentBlock = undefined; this.#parsedBlocks = []; let line: string | undefined; while (true) { - line = this.#getNextLine(); + line = this.#lineIter.next(); // EOF if (line === undefined) { @@ -314,7 +349,7 @@ export class RawPodParser { continue; } - if (/^=[a-zA-Z]/.test(line)) { + if (RawPodParser.#isCommandParagraph(line)) { if (line.startsWith("=cut")) { if (this.#currentBlock !== undefined) { this.#parsedBlocks.push(this.#currentBlock); @@ -328,7 +363,11 @@ export class RawPodParser { } if (this.#currentBlock === undefined) { - this.#currentBlock = { kind: "rawpodblock", lineNo: this.#currentLineNo, paragraphs: [] }; + this.#currentBlock = { + kind: "rawpodblock", + lineNo: this.#lineIter.currentLineNo(), + paragraphs: [] + }; } if (line.startsWith("=pod")) { @@ -354,7 +393,7 @@ export class RawPodParser { continue; } - if (/^[ \t]/.test(line)) { + if (RawPodParser.#isVerbatimParagraph(line)) { let para = this.#parseVerbatim(line); this.#currentBlock.paragraphs.push(para); @@ -377,27 +416,23 @@ export class RawPodParser { }; } - *#makeLineIter(lines: string[]) { - yield* lines; + static #isCommandParagraph(line: string): boolean { + return /^=[a-zA-Z]/.test(line); } - #getNextLine(): string | undefined { - let { value, done } = this.#lineIter.next(); - - if (done || value === undefined) { - return; - } - - this.#currentLineNo++; + static #isVerbatimParagraph(line: string): boolean { + return /^[ \t]/.test(line); + } - return value; + static #isOrdinaryParagraph(line: string): boolean { + return !(RawPodParser.#isCommandParagraph(line) || RawPodParser.#isVerbatimParagraph(line)); } #skipUntilEmptyLine(): void { let line: string | undefined; while (true) { - line = this.#getNextLine(); + line = this.#lineIter.next(); if (!line) { return; @@ -411,7 +446,7 @@ export class RawPodParser { ): string { let line: string | undefined; - while (line = this.#getNextLine()) { + while (line = this.#lineIter.next()) { if (trimOpts.trimStart && trimOpts.trimEnd) { line = line.trim(); } else if (trimOpts.trimStart) { @@ -452,7 +487,7 @@ export class RawPodParser { */ #tryParseCommand(line: string): PodParagraph | RawPodParseError { line = line.trimEnd(); - const lineNo = this.#currentLineNo; + const lineNo = this.#lineIter.currentLineNo(); let matchResult; @@ -497,8 +532,14 @@ export class RawPodParser { if (text) { this.#appendNextLineUntilEmptyLine(text, { trimStart: true, trimEnd: true }); para.lines = [text]; - } else { - this.#skipUntilEmptyLine(); + + return para; + } + + this.#skipUntilEmptyLine(); + let ordinaryPara = this.#parseNextOrdinaryOrRewind(); + if (ordinaryPara) { + para.lines = ordinaryPara.lines; } return para; @@ -518,8 +559,14 @@ export class RawPodParser { if (text) { this.#appendNextLineUntilEmptyLine(text, { trimStart: true, trimEnd: true }); para.lines = [text]; - } else { - this.#skipUntilEmptyLine(); + + return para; + } + + this.#skipUntilEmptyLine(); + let ordinaryPara = this.#parseNextOrdinaryOrRewind(); + if (ordinaryPara) { + para.lines = ordinaryPara.lines; } return para; @@ -534,7 +581,7 @@ export class RawPodParser { while (currentLine) { lines.push(currentLine.trim()); - currentLine = this.#getNextLine(); + currentLine = this.#lineIter.next(); } let para: UnordererdItemParagraph = { @@ -553,6 +600,10 @@ export class RawPodParser { }; this.#skipUntilEmptyLine(); + let ordinaryPara = this.#parseNextOrdinaryOrRewind(); + if (ordinaryPara) { + para.lines = ordinaryPara.lines; + } return para; } @@ -685,7 +736,7 @@ export class RawPodParser { while (currentLine) { lines.push(currentLine.trimEnd()); - currentLine = this.#getNextLine(); + currentLine = this.#lineIter.next(); } let para: ForParagraph = { @@ -726,7 +777,7 @@ export class RawPodParser { */ #parseVerbatim(line: string): VerbatimParagraph { let currentLine: string | undefined = line; - const lineNo = this.#currentLineNo; + const lineNo = this.#lineIter.currentLineNo(); let lines: Array = []; @@ -734,7 +785,7 @@ export class RawPodParser { while (currentLine) { lines.push(currentLine.trimEnd()); - currentLine = this.#getNextLine(); + currentLine = this.#lineIter.next(); } return { @@ -750,7 +801,7 @@ export class RawPodParser { */ #parseOrdinary(line: string): OrdinaryParagraph { let currentLine: string | undefined = line; - const lineNo = this.#currentLineNo; + const lineNo = this.#lineIter.currentLineNo(); let lines: Array = []; @@ -758,7 +809,7 @@ export class RawPodParser { while (currentLine) { lines.push(currentLine); - currentLine = this.#getNextLine(); + currentLine = this.#lineIter.next(); } return { @@ -767,6 +818,39 @@ export class RawPodParser { lines: lines, }; } + + /** Tries to parse the next paragraph as ordinary paragraph. + * If the next paragraph is of some other type, rewinds the internal line + * iterator back to before it started parsing and returns `undefined`. + */ + #parseNextOrdinaryOrRewind(): OrdinaryParagraph | undefined { + this.#lineIter.save(); + + let line: string | undefined; + + // Advance until ordinary paragraph and return that, or rewind + while (true) { + line = this.#lineIter.next(); + + // EOF + if (line === undefined) { + return; + } + + // line is empty + if (line === "") { + continue; + } + + if (RawPodParser.#isOrdinaryParagraph(line)) { + return this.#parseOrdinary(line); + } + + // Encountered something else, so rewind and return + this.#lineIter.rewind(); + return; + } + } } /** Tracks the state for processing a {@link RawPodDocument} into a proper From 782f89f66dc62b6adf4c4b802be27edc69d1a6f3 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Tue, 7 Jan 2025 11:32:04 +0100 Subject: [PATCH 16/20] server: pod: remove error handling of RawPodParser As suggested in PR #142 [1], get rid of the error handling in the RawPodParser class as part of making the overall POD-to-Markdown conversion process more lax. This only affects =begin, =end and =for command paragraphs, which are now parsed even if there is no format name, despite the spec [2] requiring one. [1]: https://github.com/bscan/PerlNavigator/pull/142#issuecomment-2568327657 [2]: https://perldoc.perl.org/perlpodspec#%22=begin-formatname%22 Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 13 ++------- server/src/pod.ts | 64 +++++++++--------------------------------- 2 files changed, 15 insertions(+), 62 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index 917f3a0..b6f0f5a 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -10,21 +10,16 @@ import { RawPodDocument, VerbatimParagraph, PodToMarkdownConverter, - RawPodParseError, } from "./pod"; // Used to return errors that are otherwise logged. -const podToMd = (fileContents: string): string | RawPodParseError | PodProcessingError => { +const podToMd = (fileContents: string): string | PodProcessingError => { const parser = new RawPodParser(); const processor = new PodProcessor(); const converter = new PodToMarkdownConverter(); let parseRes = parser.parse(fileContents); - if (parseRes.kind === "parseerror") { - return parseRes; - } - let processRes = processor.process(parseRes); if (processRes.kind === "processingerror") { @@ -2250,13 +2245,9 @@ describe("pod lists to markdown lists", () => { const processor = new PodProcessor(); const converter = new PodToMarkdownConverter(); - const podToMd = (fileContents: string): string | RawPodParseError | PodProcessingError => { + const podToMd = (fileContents: string): string | PodProcessingError => { let parseRes = parser.parse(fileContents); - if (parseRes.kind === "parseerror") { - return parseRes; - } - let processRes = processor.process(parseRes); if (processRes.kind === "processingerror") { diff --git a/server/src/pod.ts b/server/src/pod.ts index bbb61cb..c66138b 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -5,13 +5,7 @@ import { isFile } from "./utils"; // Error types -export type PodParseError = RawPodParseError | PodProcessingError; - -export interface RawPodParseError { - kind: "parseerror"; - message: string; - lineNo: number; -} +export type PodParseError = PodProcessingError; export interface PodProcessingError { kind: "processingerror"; @@ -329,7 +323,7 @@ export class RawPodParser { * POD content that hasn't been processed and checked for validity yet. * This is done via the {@link PodProcessor}. */ - parse(fileContents: string): RawPodDocument | RawPodParseError { + parse(fileContents: string): RawPodDocument { // Reset state this.#lineIter = new LinesIterator(fileContents); this.#currentBlock = undefined; @@ -378,14 +372,13 @@ export class RawPodParser { // other command paragraphs let paraResult = this.#tryParseCommand(line); - if (paraResult.kind === "parseerror") { - return paraResult; - } - // no need to skip to an empty line here, as that is handled for // each paragraph in tryParseCommand - this.#currentBlock.paragraphs.push(paraResult); + if (paraResult) { + this.#currentBlock.paragraphs.push(paraResult); + } + continue; } @@ -485,7 +478,7 @@ export class RawPodParser { /** Tries to parse a command paragraph. * The passed `line` is expected to have matched `/^=[a-zA-Z]/` beforehand. */ - #tryParseCommand(line: string): PodParagraph | RawPodParseError { + #tryParseCommand(line: string): PodParagraph | undefined { line = line.trimEnd(); const lineNo = this.#lineIter.currentLineNo(); @@ -669,13 +662,7 @@ export class RawPodParser { ) ][0]; if (matchResult !== undefined) { - if (matchResult.groups?.formatname === undefined) { - return { - kind: "parseerror", - lineNo: lineNo, - message: `"=begin" command at line ${lineNo} does not contain any format name`, - }; - } + let formatname = matchResult.groups?.formatname ?? ""; let parameter = matchResult.groups?.parameter || ""; parameter = this.#appendNextLineUntilEmptyLine(parameter).trim(); @@ -683,7 +670,7 @@ export class RawPodParser { let para: BeginParagraph = { kind: "begin", lineNo: lineNo, - formatname: matchResult.groups?.formatname?.trim() as string, + formatname: formatname.trim(), parameter: parameter, }; @@ -693,20 +680,14 @@ export class RawPodParser { // =end matchResult = [...line.matchAll(/^=end(\s+(?:?[-a-zA-Z0-9_]+))?/g)][0]; if (matchResult !== undefined) { - if (matchResult.groups?.formatname === undefined) { - return { - kind: "parseerror", - lineNo: lineNo, - message: `"=end" command at line ${lineNo} does not contain any format name`, - }; - } + let formatname = matchResult.groups?.formatname ?? ""; this.#skipUntilEmptyLine(); let para: EndParagraph = { kind: "end", lineNo: lineNo, - formatname: matchResult.groups?.formatname?.trim() as string, + formatname: formatname.trim(), }; return para; @@ -717,15 +698,7 @@ export class RawPodParser { ...line.matchAll(/^=for(\s+(?:?[-a-zA-Z0-9_]+)(\s+(?.*))?)?/g) ][0]; if (matchResult !== undefined) { - const formatname = matchResult.groups?.formatname; - - if (formatname === undefined) { - return { - kind: "parseerror", - lineNo: lineNo, - message: `"=for" command at line ${lineNo} does not contain any format name`, - }; - } + const formatname = matchResult.groups?.formatname ?? ""; let contents = (matchResult.groups?.contents || "").trim(); @@ -742,7 +715,7 @@ export class RawPodParser { let para: ForParagraph = { kind: "for", lineNo: lineNo, - formatname: formatname, + formatname: formatname.trim(), lines: lines, }; @@ -764,12 +737,6 @@ export class RawPodParser { return para; } - - return { - kind: "parseerror", - lineNo: lineNo, - message: `failed to parse command from line ${lineNo}: "${line}" is not recognized as command paragraph`, - }; } /** Parses a verbatim paragraph. @@ -2055,11 +2022,6 @@ export async function getPod( let parser = new RawPodParser(); let rawPodDocResult = parser.parse(fileContents); - if (rawPodDocResult.kind === "parseerror") { - // TODO: log error? --> needs access to settings for nLog - return; - } - let processor = new PodProcessor(); let podDocResult = processor.process(rawPodDocResult); From 348c0478924775fad0a558f1a16c702b3c09a42a Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Tue, 7 Jan 2025 14:59:43 +0100 Subject: [PATCH 17/20] server: pod: remove error handling of PodProcessor Similar to the previous commit, get rid of the error handling in the PodProcessor class in order to make the overall POD-to-Markdown conversion process more lax (as suggested in PR #142 [1]). This has a few consequences: - =head paragraphs are now allowed within =over ... =back blocks (even though it is suggested not to put them there [2]). - If there are any disallowed command paragraphs inside =begin ... =end, ignore them instead of returning an error. - The formatname parameter of =begin and =end paragraphs does not have to match anymore. - Should there be any =back or =end paragraphs without a matching =over or =begin, ignore them instead of returning an error. - Even though data paragraphs are constructed by the processor itself and should never occur naturally inside the raw pod blocks being processed, handle them anyways -- this means that they get ignored everywhere, except inside =begin ... =end blocks, where they're just added to the processed block. This is a minor detail, but still worth pointing out. [1]: https://github.com/bscan/PerlNavigator/pull/142#issuecomment-2568327657 [2]: https://perldoc.perl.org/perlpod#=over-indentlevel Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 77 +++++++++++----- server/src/pod.ts | 205 +++++++++-------------------------------- 2 files changed, 101 insertions(+), 181 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index b6f0f5a..440b8b0 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -5,27 +5,20 @@ import { PodDocument, PodParagraph, RawPodParser, - PodProcessingError, PodProcessor, RawPodDocument, VerbatimParagraph, PodToMarkdownConverter, } from "./pod"; -// Used to return errors that are otherwise logged. -const podToMd = (fileContents: string): string | PodProcessingError => { +const podToMd = (fileContents: string): string => { const parser = new RawPodParser(); const processor = new PodProcessor(); const converter = new PodToMarkdownConverter(); let parseRes = parser.parse(fileContents); - let processRes = processor.process(parseRes); - if (processRes.kind === "processingerror") { - return processRes; - } - return converter.convert(processRes); }; @@ -894,9 +887,6 @@ describe("complex POD processing cases", () => { const parser = new RawPodParser(); const processor = new PodProcessor(); - // We forcibly omit the `message` property here so the object matcher ignores it. - const processingError = { kind: "processingerror" } as PodProcessingError; - // Spec requires matching =end, but we choose to tolerate this test("unclosed data block", () => { const fileContents = `\ @@ -1182,16 +1172,43 @@ Ordinary. Verbatim. -=head1 SOME COOL TITLE THAT SHOULDN'T BE HERE +=head1 SOME COOL TITLE THAT GETS IGNORED =end foo =cut `; + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "foo", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "Ordinary.", + "", + " Verbatim." + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; const result = parser.parse(fileContents); - expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); }); test("normal data block with command paragraph", () => { @@ -1255,16 +1272,39 @@ Ordinary. =over 42 -=head1 +=head1 I GET TOLERATED =back =cut `; + const paragraphs: Array = [ + { + kind: "overblock", + level: 42, + paragraphs: [ + { + kind: "head", + level: HeaderLevel.One, + contents: "I GET TOLERATED", + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; const result = parser.parse(fileContents); - expect(processor.process(result as RawPodDocument)).toMatchObject(processingError); + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); }); test("double-nested over block", () => { @@ -2245,15 +2285,10 @@ describe("pod lists to markdown lists", () => { const processor = new PodProcessor(); const converter = new PodToMarkdownConverter(); - const podToMd = (fileContents: string): string | PodProcessingError => { + const podToMd = (fileContents: string): string => { let parseRes = parser.parse(fileContents); - let processRes = processor.process(parseRes); - if (processRes.kind === "processingerror") { - return processRes; - } - return converter.convert(processRes); }; diff --git a/server/src/pod.ts b/server/src/pod.ts index c66138b..a1ac3b3 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -3,15 +3,6 @@ import { PerlDocument, PerlElem, PerlSymbolKind } from "./types"; import Uri from "vscode-uri"; import { isFile } from "./utils"; -// Error types - -export type PodParseError = PodProcessingError; - -export interface PodProcessingError { - kind: "processingerror"; - message: string; -} - /** A paragraph whose first line matches `^[ \t]`. * * May also be *inside* `=begin [formatname]` and `=end [formatname]` commands, @@ -206,6 +197,7 @@ export interface PodBlock { export type OverBlockContent = VerbatimParagraph | OrdinaryParagraph + | HeaderParagraph | UnordererdItemParagraph | OrderedItemParagraph | EncodingParagraph @@ -833,7 +825,7 @@ export class PodProcessor { * the POD specification as possible) and also merges certain paragraphs for * ease of use. */ - process(document: RawPodDocument): PodDocument | PodProcessingError { + process(document: RawPodDocument): PodDocument { // Reset state this.#blockIter = this.#makeBlockIter(document.blocks); this.#processedBlocks = []; @@ -844,10 +836,6 @@ export class PodProcessor { while (currentBlock) { const processedBlockResult = blockProcessor.process(currentBlock); - if (processedBlockResult.kind === "processingerror") { - return processedBlockResult; - } - this.#processedBlocks.push(processedBlockResult); currentBlock = this.#getNextBlock(); } @@ -892,7 +880,7 @@ class PodBlockProcessor { return value; } - process(block: RawPodBlock): PodBlock | PodProcessingError { + process(block: RawPodBlock): PodBlock { // Reset state this.#paragraphIter = this.#makeParagraphIter(block.paragraphs); this.#podBlock = { kind: "podblock", paragraphs: [] }; @@ -934,45 +922,18 @@ class PodBlockProcessor { case "head": this.#podBlock.paragraphs.push(para); break; - case "data": - return { - kind: "processingerror", - message: 'encountered unexpected data paragraph', - }; case "over": - let overBlockResult = this.#enterOverBlock(para); - - if (overBlockResult.kind === "processingerror") { - return overBlockResult; - } - - this.#podBlock.paragraphs.push(overBlockResult); + this.#podBlock.paragraphs.push(this.#enterOverBlock(para)); break; - case "back": - return { - kind: "processingerror", - message: "'=back' does not have matching '=over'", - }; case "begin": - let dataBlockResult = this.#enterDataBlock(para); - - if (dataBlockResult.kind === "processingerror") { - return dataBlockResult; - } - - this.#podBlock.paragraphs.push(dataBlockResult); + this.#podBlock.paragraphs.push(this.#enterDataBlock(para)); break; - case "end": - return { - kind: "processingerror", - message: `'=end ${para.formatname}' does not have matching '=begin ${para.formatname}'`, - }; case "for": - let forDataBlock = this.#buildDataBlockFromForPara(para); - - this.#podBlock.paragraphs.push(forDataBlock); - + this.#podBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); break; + case "data": // should not be possible to appear here, so ignore it + case "back": // doesn't have a matching =over, so ignore it + case "end": // doesn't have matching =begin, so ignore it case "encoding": // ignored case "unknown": // ignored break; @@ -986,7 +947,7 @@ class PodBlockProcessor { } // `level` must be non-zero. - #enterOverBlock(paragraph: OverParagraph): OverBlock | PodProcessingError { + #enterOverBlock(paragraph: OverParagraph): OverBlock { let overBlock: OverBlock = { kind: "overblock", lineNo: paragraph.lineNo, @@ -1025,51 +986,26 @@ class PodBlockProcessor { overBlock.paragraphs.push(para); break; + case "head": // technically not allowed by spec, but we tolerate it anyways case "ordinary": case "unordereditem": case "ordereditem": overBlock.paragraphs.push(para); break; - case "head": - return { - kind: "processingerror", - message: `encountered invalid paragraph in "=over ... =back" block: "=head${para.level} ${para.contents}"` - }; - case "data": - return { - kind: "processingerror", - message: 'encountered unexpected data paragraph in "=over ... =back" block', - }; case "over": - let nestedOverBlockResult = this.#enterOverBlock(para); - - if (nestedOverBlockResult.kind === "processingerror") { - return nestedOverBlockResult; - } - - overBlock.paragraphs.push(nestedOverBlockResult); + overBlock.paragraphs.push(this.#enterOverBlock(para)); break; case "back": isProcessingBlock = false; break; case "begin": - let nestedDataBlockResult = this.#enterDataBlock(para); - - if (nestedDataBlockResult.kind === "processingerror") { - return nestedDataBlockResult; - } - - overBlock.paragraphs.push(nestedDataBlockResult); + overBlock.paragraphs.push(this.#enterDataBlock(para)); break; - case "end": - return { - kind: "processingerror", - message: `'=end ${para.formatname}' does not have matching '=begin ${para.formatname}'`, - }; case "for": - let nestedForDataBlock = this.#buildDataBlockFromForPara(para); - - overBlock.paragraphs.push(nestedForDataBlock); + overBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); + break; + case "data": // should not be possible to appear here, so ignore it + case "end": // doesn't have matching =begin, so ignore it case "encoding": // ignored case "unknown": // ignored break; @@ -1082,7 +1018,7 @@ class PodBlockProcessor { return overBlock; } - #enterDataBlock(paragraph: BeginParagraph): DataBlock | NormalDataBlock | PodProcessingError { + #enterDataBlock(paragraph: BeginParagraph): DataBlock | NormalDataBlock { if (paragraph.formatname.startsWith(":")) { return this.#buildNormalDataBlock(paragraph); } else { @@ -1090,7 +1026,7 @@ class PodBlockProcessor { } } - #buildDataBlock(paragraph: BeginParagraph): DataBlock | PodProcessingError { + #buildDataBlock(paragraph: BeginParagraph): DataBlock { let dataBlock: DataBlock = { kind: "datablock", formatname: paragraph.formatname, @@ -1137,10 +1073,9 @@ class PodBlockProcessor { break; case "data": - return { - kind: "processingerror", - message: `pre-existing data paragraph in "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, - }; + // Should not be appearing here, but since it's a data + // paragraph, just add it + dataBlock.paragraphs.push(para); case "encoding": case "unordereditem": case "ordereditem": @@ -1148,35 +1083,19 @@ class PodBlockProcessor { case "over": case "back": case "unknown": - return { - kind: "processingerror", - message: `unexpected command paragraph "${para.kind}" in "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, - }; + // None of these paragraphs are allowed per the perlpodspec, + // so just ignore them + break; case "begin": - let nestedDataBlockResult = this.#enterDataBlock(para); - - if (nestedDataBlockResult.kind === "processingerror") { - return nestedDataBlockResult; - } - - dataBlock.paragraphs.push(nestedDataBlockResult); + dataBlock.paragraphs.push(this.#enterDataBlock(para)); break; case "end": - const [beginFmtName, endFmtName] = [dataBlock.formatname.trim(), para.formatname.trim()]; - - if (beginFmtName !== endFmtName) { - return { - kind: "processingerror", - message: `"=end ${endFmtName}" does not match "=begin ${beginFmtName}"`, - }; - } - + // Normally the formatname parameter of =begin and =end + // blocks must match, but we're staying fault-tolerant here isProcessingBlock = false; break; case "for": - let nestedForDataBlock = this.#buildDataBlockFromForPara(para); - - dataBlock.paragraphs.push(nestedForDataBlock); + dataBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); break; default: const _exhaustiveCheck: never = para; @@ -1187,7 +1106,7 @@ class PodBlockProcessor { return dataBlock; } - #buildNormalDataBlock(paragraph: BeginParagraph): NormalDataBlock | PodProcessingError { + #buildNormalDataBlock(paragraph: BeginParagraph): NormalDataBlock { let dataBlock: NormalDataBlock = { kind: "normaldatablock", formatname: paragraph.formatname, @@ -1232,51 +1151,22 @@ class PodBlockProcessor { case "head": dataBlock.paragraphs.push(para); break; - case "data": - return { - kind: "processingerror", - message: `unexpected data paragraph in "=begin ${dataBlock.formatname} ... =end ${dataBlock.formatname}" block`, - }; case "over": - let overBlockResult = this.#enterOverBlock(para); - - if (overBlockResult.kind === "processingerror") { - return overBlockResult; - } - - dataBlock.paragraphs.push(overBlockResult); + dataBlock.paragraphs.push(this.#enterOverBlock(para)); break; - case "back": - return { - kind: "processingerror", - message: "'=back' does not have matching '=over'", - }; case "begin": - let dataBlockResult = this.#enterDataBlock(para); - - if (dataBlockResult.kind === "processingerror") { - return dataBlockResult; - } - - dataBlock.paragraphs.push(dataBlockResult); + dataBlock.paragraphs.push(this.#enterDataBlock(para)); break; case "end": - const [beginFmtName, endFmtName] = [dataBlock.formatname.trim(), para.formatname.trim()]; - - if (beginFmtName !== endFmtName) { - return { - kind: "processingerror", - message: `"=end ${endFmtName}" does not match "=begin ${beginFmtName}"`, - }; - } - + // Normally the formatname parameter of =begin and =end + // blocks must match, but we're staying fault-tolerant here isProcessingBlock = false; break; case "for": - let nestedForDataBlock = this.#buildDataBlockFromForPara(para); - - dataBlock.paragraphs.push(nestedForDataBlock); + dataBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); break; + case "data": // should not be possible to appear here, so ignore it + case "back": // doesn't have a matching =over, so ignore it case "encoding": // ignored case "unknown": // ignored break; @@ -2023,31 +1913,26 @@ export async function getPod( let rawPodDocResult = parser.parse(fileContents); let processor = new PodProcessor(); - let podDocResult = processor.process(rawPodDocResult); - - if (podDocResult.kind === "processingerror") { - // TODO: log error? --> needs access to settings for nLog - return; - } + let podDoc = processor.process(rawPodDocResult); - let podDoc: PodDocument | undefined = podDocResult; + let podDocRes: PodDocument | undefined; if (symbolName) { - podDoc = lookupSymbolInPod(symbolName, podDocResult); + podDocRes = lookupSymbolInPod(symbolName, podDoc); - if (podDoc) { - formatPodDocForSymbol(podDoc); + if (podDocRes) { + formatPodDocForSymbol(podDocRes); } } - if (!podDoc) { + if (!podDocRes) { return; } - formatPodDoc(podDoc); + formatPodDoc(podDocRes); let converter = new PodToMarkdownConverter(); - let markdown = converter.convert(podDoc); + let markdown = converter.convert(podDocRes); if (!markdown) { return; From faf59f3e0762acbd512945fd518c38f0c2387f35 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Tue, 7 Jan 2025 15:45:59 +0100 Subject: [PATCH 18/20] server: pod: use 2 instead of 8 spaces to represent tabs .. as suggested in PR #142 [1]. [1]: https://github.com/bscan/PerlNavigator/pull/142#issuecomment-2568327657 Signed-off-by: Max R. Carrara --- server/src/pod.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index a1ac3b3..259114a 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1363,14 +1363,14 @@ export class PodToMarkdownConverter { #convertVerbatimPara(verbatimPara: VerbatimParagraph): Array { return [ "```", - ...verbatimPara.lines.map((line) => tabsToSpaces(line, 8)), + ...verbatimPara.lines.map((line) => tabsToSpaces(line, 2)), "```", ]; } #convertOrdinaryPara(ordinaryPara: OrdinaryParagraph): Array { return ordinaryPara.lines - .map((line) => tabsToSpaces(line, 8)) + .map((line) => tabsToSpaces(line, 2)) .map(processInlineElements); } From 1c27bd422cf1d15e34293f7dc45e830a56f46b28 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Tue, 7 Jan 2025 15:53:25 +0100 Subject: [PATCH 19/20] server: pod: use Perl syntax highlighting for verbatim paragraphs Signed-off-by: Max R. Carrara --- server/src/pod.test.ts | 24 ++++++++++++------------ server/src/pod.ts | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts index 440b8b0..604116d 100644 --- a/server/src/pod.test.ts +++ b/server/src/pod.test.ts @@ -2052,7 +2052,7 @@ mattis lectus dictum ultricies. `; const expected = `\ -\`\`\` +\`\`\`perl Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. @@ -2087,7 +2087,7 @@ mattis lectus dictum ultricies. =cut`; const expected = `\ -\`\`\` +\`\`\`perl Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. @@ -2134,7 +2134,7 @@ mattis lectus dictum ultricies. =cut`; const expected = `\ -\`\`\` +\`\`\`perl async function getWorkspaceFoldersSafe(): Promise { try { const workspaceFolders = await connection.workspace.getWorkspaceFolders(); @@ -2196,13 +2196,13 @@ Aliquam erat: const expected = `\ Lorem ipsum dolor sit amet: -\`\`\` +\`\`\`perl Consectetur adipiscing elit. \`\`\` Integer purus nisi: -\`\`\` +\`\`\`perl Egestas et imperdiet sit amet, interdum ut nisl. Sed fringilla placerat nulla, et viverra purus ultricies sit amet. @@ -2213,7 +2213,7 @@ Feugiat ipsum nec. Aliquam erat: -\`\`\` +\`\`\`perl Maecenas dapibus arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac faucibus est elementum. @@ -2660,7 +2660,7 @@ baz The POD spec only allows certain command paragraphs to appear in an over-back block. - But we can nest things, because it all handles the same anyways. -- \`\`\` +- \`\`\`perl Verbatim paragraphs are put into a neat code block in markdown @@ -3038,7 +3038,7 @@ This stuff in here gets treated as regular POD. 42. Because this isn't handled in any special manner. - It really isn't. -\`\`\` +\`\`\`perl So yeah. The block above doesn't exist in Markdown at all. You won't even know it's there. @@ -3186,7 +3186,7 @@ And B<<<<< this >>>>> one too. **This paragraph is in bold.** -\`\`\` +\`\`\`perl B \`\`\` @@ -3228,7 +3228,7 @@ And I<<<<< this >>>>> one too. *This paragraph is in italics.* -\`\`\` +\`\`\`perl I \`\`\` @@ -3270,7 +3270,7 @@ And I>> one too. ***This paragraph is in bold italics.*** -\`\`\` +\`\`\`perl B> \`\`\` @@ -3312,7 +3312,7 @@ C C C \`This paragraph is inline code.\` -\`\`\` +\`\`\`perl C \`\`\` diff --git a/server/src/pod.ts b/server/src/pod.ts index 259114a..34f5296 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1362,7 +1362,7 @@ export class PodToMarkdownConverter { #convertVerbatimPara(verbatimPara: VerbatimParagraph): Array { return [ - "```", + "```perl", ...verbatimPara.lines.map((line) => tabsToSpaces(line, 2)), "```", ]; From f18a935f16a3cb2beb82f32f7bd57ed4e98fb572 Mon Sep 17 00:00:00 2001 From: "Max R. Carrara" Date: Tue, 7 Jan 2025 17:11:11 +0100 Subject: [PATCH 20/20] server: pod: fix no POD being returned if no symbol name was set Due to a tiny logic error, no docstrings would be rendered for modules like File::Basename or File::Find::Rule. This would happen if no symbol name set (which means that just the entire module's POD should be returned). Signed-off-by: Max R. Carrara --- server/src/pod.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/pod.ts b/server/src/pod.ts index 34f5296..210482e 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -1915,7 +1915,7 @@ export async function getPod( let processor = new PodProcessor(); let podDoc = processor.process(rawPodDocResult); - let podDocRes: PodDocument | undefined; + let podDocRes: PodDocument | undefined = podDoc; if (symbolName) { podDocRes = lookupSymbolInPod(symbolName, podDoc);