diff --git a/server/jest.config.ts b/server/jest.config.ts new file mode 100644 index 0000000..3d4f68e --- /dev/null +++ b/server/jest.config.ts @@ -0,0 +1,17 @@ +import type { JestConfigWithTsJest } from 'ts-jest' + +const jestConfig: JestConfigWithTsJest = { + // [...] + transform: { + // '^.+\\.[tj]sx?$' to process ts,js,tsx,jsx with `ts-jest` + // '^.+\\.m?[tj]sx?$' to process ts,js,tsx,jsx,mts,mjs,mtsx,mjsx with `ts-jest` + '^.+\\.tsx?$': [ + 'ts-jest', + { + // ts-jest configuration goes here + }, + ], + }, +} + +export default jestConfig; diff --git a/server/package.json b/server/package.json index 70c8fd9..81413bb 100644 --- a/server/package.json +++ b/server/package.json @@ -20,12 +20,20 @@ "vscode-textmate": "^9.0.0", "vscode-oniguruma": "^2.0.1" }, - "scripts": {}, + "scripts": { + "test": "jest" + }, "main": "./src/dist/serverMain.js", "bin": { "perlnavigator": "./bin/perlnavigator" }, - "devDependencies": {}, + "devDependencies": { + "@types/jest": "^29.5.12", + "jest": "^29.7.0", + "ts-jest": "^29.2.4", + "ts-node": "^10.9.2", + "typescript": "^5.5.4" + }, "keywords": [ "perl", "lsp", diff --git a/server/src/pod.test.ts b/server/src/pod.test.ts new file mode 100644 index 0000000..604116d --- /dev/null +++ b/server/src/pod.test.ts @@ -0,0 +1,3328 @@ +import { + HeaderLevel, + OrdinaryParagraph, + PodBlockContent, + PodDocument, + PodParagraph, + RawPodParser, + PodProcessor, + RawPodDocument, + VerbatimParagraph, + PodToMarkdownConverter, +} from "./pod"; + +const podToMd = (fileContents: string): string => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + const converter = new PodToMarkdownConverter(); + + let parseRes = parser.parse(fileContents); + let processRes = processor.process(parseRes); + + return converter.convert(processRes); +}; + + +describe("basic parser and processor tests", () => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + + test("empty file returns empty document", () => { + const fileContents = ""; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("single =pod ... =cut region", () => { + const fileContents = `\ +=pod + +=cut +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("repeated =pod ... =cut regions with Perl", () => { + const fileContents = `\ +=pod + +=cut + +# This is a Perl comment and doesn't get parsed. +sub foo { + my ($bar, $baz) = @_; + + die "baz didn't bar" if !defined($baz->($bar)); + + return "foo $bar"; +} + +=pod + +=cut + +# =pod +# +# This should not get parsed +# +# =cut + +=pod And this here +gets ignored. + +=cut This here +as well. +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + { + kind: "rawpodblock", + paragraphs: [], + }, + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [], + }, + { + kind: "podblock", + paragraphs: [], + }, + { + kind: "podblock", + paragraphs: [], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("repeated =pod command", () => { + const fileContents = `\ +=pod + +=pod + +=cut +`; + + const expected: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + expect(parser.parse(fileContents)).toMatchObject(expected); + }); + + test("repeated =cut command", () => { + const fileContents = `\ +=pod + +=cut + +=cut +`; + + const expected: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + expect(parser.parse(fileContents)).toMatchObject(expected); + }); + + test("unclosed pod block", () => { + const fileContents = `\ +=pod`; + + const expected: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [], + }, + ], + }; + + expect(parser.parse(fileContents)).toMatchObject(expected); + }); + + test("document with ordinary paragraph", () => { + const fileContents = `\ +=pod + +This is an ordinary paragraph. + +=cut +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [ + { + kind: "ordinary", + lines: ["This is an ordinary paragraph."], + }, + ], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "ordinary", + lines: ["This is an ordinary paragraph."], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("document with verbatim paragraph", () => { + const fileContents = `\ +=pod + + This is a verbatim paragraph. Notice the space. + +=cut +`; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: [ + { + kind: "verbatim", + lines: [" This is a verbatim paragraph. Notice the space."], + }, + ], + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "verbatim", + lines: [" This is a verbatim paragraph. Notice the space."], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("document with ordinary and verbatim paragraphs", () => { + const fileContents = `\ +=pod + +This is an ordinary paragraph. It spans a single line. + + This is a verbatim paragraph. It spans a single line. + +This is an ordinary paragraph. +It spans two... +\tNo, three lines! + + This is a verbatim paragraph. +It spans two... +\tNo, three lines! +Actually, four. Sorry. + +=cut +`; + + const paragraphs: Array = [ + { + kind: "ordinary", + lines: [ + "This is an ordinary paragraph. It spans a single line.", + ], + }, + { + kind: "verbatim", + lines: [ + " This is a verbatim paragraph. It spans a single line.", + ], + }, + { + kind: "ordinary", + lines: [ + "This is an ordinary paragraph.", + "It spans two...", + "\tNo, three lines!", + ], + }, + { + kind: "verbatim", + lines: [ + " This is a verbatim paragraph.", + "It spans two...", + "\tNo, three lines!", + "Actually, four. Sorry.", + ], + }, + ]; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: paragraphs, + }, + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toBeDefined(); + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); + + test("document with multiple regions and various paragraphs", () => { + const fileContents = `\ +=pod + +=head1 HEAD ONE + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + + + +=head2 HEAD TWO + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + +=head3 HEAD +THREE + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=head4 HEAD +F +O +U +R + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=head5 HEAD FIVE + +=over + +=item * + + + + + +Lorem Ipsum. + +=item * + +Dolor sit amet. + +=item +* + +Consectetur adipiscing elit. + +=back + +=head6 HEAD SIX + +=over 3.5 + +=back + +=over 42 + + +=item Morbi ut iaculis orci. Praesent +vehicula risus sed leo commodo, sit amet +laoreet dolor consectetur. + + +=back + +=over 0 + +=back + + + +=head7 UNKNOWN COMMAND PARAGRAPH + + + +=cut + +# This is Perl and is ignored by the parser. +sub foobar { + my ($foo, $bar) = @_; + + return "$foo $bar"; +} + + +=pod + +=encoding utf8 + +=begin foo + + +=end foo + +=begin bar + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + + + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=end bar + +=begin :baz some parameter stuff + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=end :baz + +=for comment This is a comment. + +=for comment This is +a +multiline +comment. + +=cut +`; + + const firstParagraphsRaw: Array = [ + { + kind: "head", + level: HeaderLevel.One, + contents: "HEAD ONE", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Two, + contents: "HEAD TWO", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Three, + contents: "HEAD THREE", + }, + { + kind: "verbatim", + lines: [" Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Four, + contents: "HEAD F O U R", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Five, + contents: "HEAD FIVE", + }, + { + kind: "over", + level: 4, + }, + { + kind: "unordereditem", + lines: ["Lorem Ipsum."], + }, + { + kind: "unordereditem", + lines: ["Dolor sit amet."], + }, + { + kind: "unordereditem", + lines: ["Consectetur adipiscing elit."], + }, + { + kind: "back", + }, + { + kind: "head", + level: HeaderLevel.Six, + contents: "HEAD SIX", + }, + { + kind: "over", + level: 3.5, + }, + { + kind: "back", + }, + { + kind: "over", + level: 42, + }, + { + kind: "unordereditem", + lines: [ + "Morbi ut iaculis orci. Praesent", + "vehicula risus sed leo commodo, sit amet", + "laoreet dolor consectetur.", + ], + }, + { + kind: "back", + }, + { + kind: "over", + level: 4, + }, + { + kind: "back", + }, + { + kind: "unknown", + cmd: "head7", + contents: "UNKNOWN COMMAND PARAGRAPH", + }, + ]; + + const secondParagraphsRaw: Array = [ + { + kind: "encoding", + name: "utf8", + }, + { + kind: "begin", + formatname: "foo", + parameter: "", + }, + { + kind: "end", + formatname: "foo", + }, + { + kind: "begin", + formatname: "bar", + parameter: "", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "end", + formatname: "bar", + }, + { + kind: "begin", + formatname: ":baz", + parameter: "some parameter stuff", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "end", + formatname: ":baz", + }, + { + kind: "for", + formatname: "comment", + lines: ["This is a comment."], + }, + { + kind: "for", + formatname: "comment", + lines: [ + "This is", + "a", + "multiline", + "comment.", + ], + }, + ]; + + const firstParagraphsProcessed: Array = [ + { + kind: "head", + level: HeaderLevel.One, + contents: "HEAD ONE", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Two, + contents: "HEAD TWO", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Three, + contents: "HEAD THREE", + }, + { + kind: "verbatim", + lines: [" Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Four, + contents: "HEAD F O U R", + }, + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "head", + level: HeaderLevel.Five, + contents: "HEAD FIVE", + }, + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "unordereditem", + lines: ["Lorem Ipsum."], + }, + { + kind: "unordereditem", + lines: ["Dolor sit amet."], + }, + { + kind: "unordereditem", + lines: ["Consectetur adipiscing elit."], + }, + ], + }, + { + kind: "head", + level: HeaderLevel.Six, + contents: "HEAD SIX", + }, + { + kind: "overblock", + level: 3.5, + paragraphs: [], + }, + { + kind: "overblock", + level: 42, + paragraphs: [ + { + kind: "unordereditem", + lines: [ + "Morbi ut iaculis orci. Praesent", + "vehicula risus sed leo commodo, sit amet", + "laoreet dolor consectetur.", + ], + }, + ], + }, + { + kind: "overblock", + level: 4, + paragraphs: [], + }, + // NOTE: unknown command paragraph is ignored and therefore not included here + ]; + + const secondParagraphsProcessed: Array = [ + // NOTE: encoding command paragraph is ignored and therefore not included here + { + kind: "datablock", + formatname: "foo", + parameter: "", + paragraphs: [], + }, + { + kind: "datablock", + formatname: "bar", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + "", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + "", + "Lorem ipsum dolor sit amet, consectetur adipiscing elit.", + ], + }, + ], + }, + { + kind: "normaldatablock", + formatname: ":baz", + parameter: "some parameter stuff", + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + ], + }, + { + kind: "datablock", + formatname: "comment", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: ["This is a comment."], + }, + ], + }, + { + kind: "datablock", + formatname: "comment", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "This is", + "a", + "multiline", + "comment.", + ], + }, + ], + }, + ]; + + const expectedRaw: RawPodDocument = { + kind: "rawpoddocument", + blocks: [ + { + kind: "rawpodblock", + paragraphs: firstParagraphsRaw, + }, + { + kind: "rawpodblock", + paragraphs: secondParagraphsRaw, + } + ], + }; + + const expectedProcessed: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: firstParagraphsProcessed, + }, + { + kind: "podblock", + paragraphs: secondParagraphsProcessed, + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(result).toMatchObject(expectedRaw); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expectedProcessed); + }); +}); + + +describe("complex POD processing cases", () => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + + // Spec requires matching =end, but we choose to tolerate this + test("unclosed data block", () => { + const fileContents = `\ +=pod + +=begin foo + +=cut +`; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "datablock", + formatname: "foo", + paragraphs: [], + parameter: "", + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + // Spec requires matching =end, but we choose to tolerate this + test("unclosed normal data block", () => { + const fileContents = `\ +=pod + +=begin :foo + +=cut +`; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":foo", + paragraphs: [], + parameter: "", + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + // Spec requires matching =back, but we choose to tolerate this + test("unclosed over block", () => { + const fileContents = `\ +=pod + +=over 42 + +=cut +`; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "overblock", + level: 42, + paragraphs: [], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("over blocks with invalid indent levels", () => { + const fileContents = `\ +=pod + +=over 0 + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=back + +=over -1 + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=back + +=cut +`; + + const paragraphs: Array = [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + ], + }, + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + + }) + + test("data block with ordinary and verbatim paragraphs", () => { + const fileContents = `\ +=pod + +=begin foo bar + +Ordinary. + + Verbatim. + +Ordinary. +But longer. + +\tVerbatim. +But longer. + +=end foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "foo", + parameter: "bar", + paragraphs: [ + { + kind: "data", + lines: [ + "Ordinary.", + "", + " Verbatim.", + "", + "Ordinary.", + "But longer.", + "", + "\tVerbatim.", + "But longer.", + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("normal data block with ordinary and verbatim paragraphs", () => { + const fileContents = `\ +=pod + +=begin :foo bar + +Ordinary. + + Verbatim. + +Ordinary. +But longer. + +\tVerbatim. +But longer. + +=end :foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":foo", + parameter: "bar", + paragraphs: [ + { + kind: "ordinary", + lines: ["Ordinary."], + }, + { + kind: "verbatim", + lines: [" Verbatim."], + }, + { + kind: "ordinary", + lines: ["Ordinary.", "But longer."], + }, + { + kind: "verbatim", + lines: ["\tVerbatim.", "But longer."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("data block with command paragraph", () => { + const fileContents = `\ +=pod + +=begin foo + +Ordinary. + + Verbatim. + +=head1 SOME COOL TITLE THAT GETS IGNORED + +=end foo + +=cut +`; + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "foo", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "Ordinary.", + "", + " Verbatim." + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("normal data block with command paragraph", () => { + const fileContents = `\ +=pod + +=begin :foo + +Ordinary. + + Verbatim. + +=head1 SOME COOL TITLE THAT CAN ACTUALLY BE HERE + +=end :foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":foo", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["Ordinary."], + }, + { + kind: "verbatim", + lines: [" Verbatim."], + }, + { + kind: "head", + level: HeaderLevel.One, + contents: "SOME COOL TITLE THAT CAN ACTUALLY BE HERE", + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("over block with header command paragraph", () => { + const fileContents = `\ +=pod + +=over 42 + +=head1 I GET TOLERATED + +=back + +=cut +`; + const paragraphs: Array = [ + { + kind: "overblock", + level: 42, + paragraphs: [ + { + kind: "head", + level: HeaderLevel.One, + contents: "I GET TOLERATED", + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("double-nested over block", () => { + const fileContents = `\ +=pod + +=over + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=over + +=item * + +Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor. + +=item * + +Pellentesque elementum luctus urna, et dapibus est faucibus eu. + +=back + +Mauris ut arcu ipsum. + +=back + +=cut +`; + + const paragraphs: Array = [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "unordereditem", + lines: ["Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor."], + }, + { + kind: "unordereditem", + lines: ["Pellentesque elementum luctus urna, et dapibus est faucibus eu."], + }, + ], + }, + { + kind: "ordinary", + lines: ["Mauris ut arcu ipsum."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("deeply nested over block", () => { + const fileContents = `\ +=pod + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +=over + +I know this looks weird, but this is still valid POD. + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=back + +=cut +`; + + const paragraphs: Array = [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "overblock", + level: 4, + paragraphs: [ + { + kind: "ordinary", + lines: ["I know this looks weird, but this is still valid POD."], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("double-nested data block", () => { + const fileContents = `\ +=pod + +=begin foo + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=begin bar + +Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor. + +Pellentesque elementum luctus urna, et dapibus est faucibus eu. + +=end bar + +Mauris ut arcu ipsum. + +=end foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "foo", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "datablock", + formatname: "bar", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: [ + "Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor.", + "", + "Pellentesque elementum luctus urna, et dapibus est faucibus eu.", + ], + }, + ], + }, + { + kind: "data", + lines: ["Mauris ut arcu ipsum."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("deeply nested data block", () => { + const fileContents = `\ +=pod + +=begin one + +=begin two + +=begin three + +=begin four + +=begin five + +=begin six + +=begin seven + +=begin eight + +=begin nine + +=begin ten + +I know this looks weird, but this is still valid POD. + +=end ten + +=end nine + +=end eight + +=end seven + +=end six + +=end five + +=end four + +=end three + +=end two + +=end one + +=cut +`; + + const paragraphs: Array = [ + { + kind: "datablock", + formatname: "one", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "two", + parameter: "", + paragraphs: [ + + { + kind: "datablock", + formatname: "three", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "four", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "five", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "six", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "seven", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "eight", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "nine", + parameter: "", + paragraphs: [ + { + kind: "datablock", + formatname: "ten", + parameter: "", + paragraphs: [ + { + kind: "data", + lines: ["I know this looks weird, but this is still valid POD."], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("double-nested normal data block", () => { + const fileContents = `\ +=pod + +=begin :foo + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +=begin :bar + +Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor. + +Pellentesque elementum luctus urna, et dapibus est faucibus eu. + +=end :bar + +Mauris ut arcu ipsum. + +=end :foo + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":foo", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["Lorem ipsum dolor sit amet, consectetur adipiscing elit."], + }, + { + kind: "normaldatablock", + formatname: ":bar", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["Sed consequat, neque eu aliquam porttitor, tellus augue faucibus quam, a ornare neque dolor vitae dolor."], + }, + { + kind: "ordinary", + lines: ["Pellentesque elementum luctus urna, et dapibus est faucibus eu."], + }, + ], + }, + { + kind: "ordinary", + lines: ["Mauris ut arcu ipsum."], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("deeply nested normal data block", () => { + const fileContents = `\ +=pod + +=begin :one + +=begin :two + +=begin :three + +=begin :four + +=begin :five + +=begin :six + +=begin :seven + +=begin :eight + +=begin :nine + +=begin :ten + +I know this looks weird, but this is still valid POD. + +=end :ten + +=end :nine + +=end :eight + +=end :seven + +=end :six + +=end :five + +=end :four + +=end :three + +=end :two + +=end :one + +=cut +`; + + const paragraphs: Array = [ + { + kind: "normaldatablock", + formatname: ":one", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":two", + parameter: "", + paragraphs: [ + + { + kind: "normaldatablock", + formatname: ":three", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":four", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":five", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":six", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":seven", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":eight", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":nine", + parameter: "", + paragraphs: [ + { + kind: "normaldatablock", + formatname: ":ten", + parameter: "", + paragraphs: [ + { + kind: "ordinary", + lines: ["I know this looks weird, but this is still valid POD."], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + ]; + + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: paragraphs, + } + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); + + test("=for command without content", () => { + const fileContents = `\ +=pod + +=for comment + +=cut +`; + const expected: PodDocument = { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: [ + { + kind: "datablock", + formatname: "comment", + parameter: "", + paragraphs: [], + }, + ], + }, + ], + }; + + const result = parser.parse(fileContents); + + expect(processor.process(result as RawPodDocument)).toMatchObject(expected); + }); +}); + +describe("pod to markdown conversion tests", () => { + test("no pod block results in no markdown", () => { + const fileContents = `\ +# This isn't getting parsed. +sub foobar : prototype($) { + my ($baz) = @_; + + return "baz: $baz"; +} +`; + const expected = ""; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("empty pod block results in no markdown", () => { + const fileContents = `\ +=pod + +=cut +`; + + const expected = ""; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("ordinary paragraphs to markdown paragraphs", () => { + const fileContents = `\ +=pod + +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + +Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. + +=cut +`; + + const expected = `\ +Lorem ipsum dolor sit amet, consectetur adipiscing elit. + +Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + +Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("consecutive verbatim paragraphs to single markdown code block", () => { + const fileContents = `\ +=pod + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. + +=cut +`; + + const expected = `\ +\`\`\`perl + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. +Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus +arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac +faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat +mattis lectus dictum ultricies. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("consecutive verbatim paragraphs with indentation to single markdown block", () => { + const fileContents = `\ +=pod + + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus + arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac + faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. + +=cut`; + + const expected = `\ +\`\`\`perl + Lorem ipsum dolor sit amet, consectetur adipiscing elit. + + Integer purus nisi, egestas et imperdiet sit amet, interdum ut nisl. + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + + Vestibulum vel diam venenatis, feugiat ipsum nec, aliquam erat. Maecenas dapibus + arcu odio, ac dictum mauris cursus quis. Donec facilisis ex at nisi dictum, ac + faucibus est elementum. Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("consecutive verbatim paragraphs with deep indentation to single markdown block", () => { + const fileContents = `\ +=pod + + async function getWorkspaceFoldersSafe(): Promise { + try { + const workspaceFolders = await connection.workspace.getWorkspaceFolders(); + if (!workspaceFolders) { + return []; + } else { + return workspaceFolders; + } + } catch (error) { + return []; + } + } + + function sendDiags(params: PublishDiagnosticsParams): void { + // Before sending new diagnostics, check if the file is still open. + if (documents.get(params.uri)) { + connection.sendDiagnostics(params); + } else { + connection.sendDiagnostics({ uri: params.uri, diagnostics: [] }); + } + } + +=cut`; + + const expected = `\ +\`\`\`perl + async function getWorkspaceFoldersSafe(): Promise { + try { + const workspaceFolders = await connection.workspace.getWorkspaceFolders(); + if (!workspaceFolders) { + return []; + } else { + return workspaceFolders; + } + } catch (error) { + return []; + } + } + + function sendDiags(params: PublishDiagnosticsParams): void { + // Before sending new diagnostics, check if the file is still open. + if (documents.get(params.uri)) { + connection.sendDiagnostics(params); + } else { + connection.sendDiagnostics({ uri: params.uri, diagnostics: [] }); + } + } +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("mixed verbatim and ordinary paragraphs", () => { + const fileContents = `\ +=pod + +Lorem ipsum dolor sit amet: + + Consectetur adipiscing elit. + +Integer purus nisi: + + Egestas et imperdiet sit amet, interdum ut nisl. + + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. + +Vestibulum vel diam venenatis. +Feugiat ipsum nec. + +Aliquam erat: + + Maecenas dapibus arcu odio, ac dictum mauris cursus quis. + + Donec facilisis ex at nisi dictum, ac faucibus est elementum. + + Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. + +=cut +`; + + const expected = `\ +Lorem ipsum dolor sit amet: + +\`\`\`perl + Consectetur adipiscing elit. +\`\`\` + +Integer purus nisi: + +\`\`\`perl + Egestas et imperdiet sit amet, interdum ut nisl. + + Sed fringilla placerat nulla, et viverra purus ultricies sit amet. +\`\`\` + +Vestibulum vel diam venenatis. +Feugiat ipsum nec. + +Aliquam erat: + +\`\`\`perl + Maecenas dapibus arcu odio, ac dictum mauris cursus quis. + + Donec facilisis ex at nisi dictum, ac faucibus est elementum. + + Mauris sit amet pretium lacus. Nunc sollicitudin erat + mattis lectus dictum ultricies. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + // headers in markdown start at level 3, but do not exceed level 6 + test("pod headers to markdown headers", () => { + const fileContents = `\ +=pod + +=head1 HEAD ONE + +=head2 HEAD TWO + +=head3 HEAD THREE + +=head4 HEAD FOUR + +=head5 HEAD FIVE + +=head6 HEAD SIX + +=head7 IGNORED HEADER, NOT CONVERTED :) + +=cut +`; + + const expected = `\ +### HEAD ONE + +#### HEAD TWO + +##### HEAD THREE + +###### HEAD FOUR + +###### HEAD FIVE + +###### HEAD SIX +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); + +// NOTE: POD doesn't allow many of the following list cases and places restrictions +// on which kinds of consecutive `=item` paragraphs are allowed, for example. +// +// We're being explicitly lax here and don't conform to the spec for simplicity's +// sake. Being 100% compliant isn't really necessary anyways, because this isn't +// supposed to be a full-fledged POD-to-$FORMAT converter; it should just be sufficient +// for displaying hover documentation. +// +// See `man perlpodspec` or this page for more information: +// https://perldoc.perl.org/perlpodspec#About-=over...=back-Regions +describe("pod lists to markdown lists", () => { + const parser = new RawPodParser(); + const processor = new PodProcessor(); + const converter = new PodToMarkdownConverter(); + + const podToMd = (fileContents: string): string => { + let parseRes = parser.parse(fileContents); + let processRes = processor.process(parseRes); + + return converter.convert(processRes); + }; + + // The POD spec doesn't really specify whether `=item` paragraphs are + // allowed outside of `=over ... =back` blocks or not, so we'll just allow + // them. + test("freestanding pod list to markdown list", () => { + const fileContents = `\ +=pod + +=head1 Unordered List + +=item * + +Foo. + +=item + +Bar. + +=item * + +Baz. + +=head1 Ordered List + +=item 1. + +Foo. + +=item 2. + +Bar. + +=item 3. + +Baz. + +=head1 Unordered List From Items With Text + +=item Foo. + +=item Bar. + +=item Baz. + +=cut`; + + const expected = `\ +### Unordered List + +- Foo. +- Bar. +- Baz. + +### Ordered List + +1. Foo. +2. Bar. +3. Baz. + +### Unordered List From Items With Text + +- Foo. +- Bar. +- Baz. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod list in over block to indented markdown list", () => { + const fileContents = `\ +=pod + +=head1 Nested Lists + +=over + +=item * + +Foo. + +=item + +Bar. + +=over + +=item + +Baz. + +=item * + +Qux. + +=back + +=back + +=over + +=item 1. + +Foo. + +=item 2. + +Bar. + +=over + +=item 3. + +Baz. + +=item 4. + +Qux. + +=back + +=back + +=over + +=item Foo. + +=item Bar. + +=over + +=item Baz. + +=item Qux. + +=back + +=back + +=cut`; + + const expected = `\ +### Nested Lists + +- Foo. +- Bar. + - Baz. + - Qux. + +1. Foo. +2. Bar. + 3. Baz. + 4. Qux. + +- Foo. +- Bar. + - Baz. + - Qux. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested lists with varying indentation levels", () => { + const fileContents = `\ +=pod + +=over + +=item * foo + +=over 2 + +=item * bar + +=over 3 + +=item * baz + +=back + +=back + +=back + + +=over + +=item 1. foo + +=over 2 + +=item 2. bar + +=over 3 + +=item 3. baz + +=back + +=back + +=back + +=cut +`; + + const expected = `\ +- foo + - bar + - baz + +1. foo + 2. bar + 3. baz +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("ordered pod lists to ordered markdown lists", () => { + const fileContents = `\ +=pod + +=over 2 + +=item 1. + +This list is ordered. + +=item 2. + +According to the spec, each ordered list must start at number 1. + +=item 3. + +... and also precede in order, without skipping a number. + +=item 4 + +Everything's fine here. We may skip (forget) the dot. + +=item 5. + +Multiple +lines +are + indented +correctly. + +=back + +=over 2 + +=item 42. + +However, we avoid enforcing this ordering, because it makes things easier. + +=item 666 + +We are beyond feeble ordering. + +=item 100. + +Beholden to none. + +=back + +=cut +`; + + const expected = `\ +1. This list is ordered. +2. According to the spec, each ordered list must start at number 1. +3. ... and also precede in order, without skipping a number. +4. Everything's fine here. We may skip (forget) the dot. +5. Multiple + lines + are + indented + correctly. + +42. However, we avoid enforcing this ordering, because it makes things easier. +666. We are beyond feeble ordering. +100. Beholden to none. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("strange list items", () => { + const fileContents = `\ +=pod + +The POD spec only allows certain command paragraphs to appear in an over-back block. + +=over + +=item + +=over + +But we can nest things, because it all handles the same anyways. + +=back + +=item + + Verbatim paragraphs + are put into a neat + code block in markdown + though. + + That's fine and on spec. + + The code block even has indentation, oh my gosh. + +=item + +=item + +The item above is empty. Shouldn't be possible, but we also allow it. + +=item + +Note: We don't allow headers though. That's on spec. + +=over + +=item + +But it doesn't matter how deep you nest... + +=item + +=over + +=item You can always do weird things that conformant POD doesn't allow. + +=encoding utf-8 + +=item Encodings are ignored, for now. + +=foobar foo +bar +baz + +=item So are unknown command paragraphs. + +=back + +=back + +=back + +=cut`; + + const expected = `\ +The POD spec only allows certain command paragraphs to appear in an over-back block. + +- But we can nest things, because it all handles the same anyways. +- \`\`\`perl + Verbatim paragraphs + are put into a neat + code block in markdown + though. + + That's fine and on spec. + + The code block even has indentation, oh my gosh. + \`\`\` +- +- The item above is empty. Shouldn't be possible, but we also allow it. +- Note: We don't allow headers though. That's on spec. + - But it doesn't matter how deep you nest... + - - You can always do weird things that conformant POD doesn't allow. + - Encodings are ignored, for now. + - So are unknown command paragraphs. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("mixed list types to mixed markdown lists", () => { + const fileContents = `\ +=pod + +=item Freestanding list items like this one aren't explicitly specified. + +=item * + +So we'll just allow them. + +=item 42. + +We're even throwing in an "ordered" list item here. + +=cut + + + +=pod + +=over 2 + +=item This applies to over-back blocks as well, by the way. + +=item 10. + +We can do whatever we want, because conforming to the spec here would +be needlessly complex. + +=item It's not like +markdown cares either. +(Does it actually, though?) + +=back + +=cut +`; + + const expected = `\ +- Freestanding list items like this one aren't explicitly specified. +- So we'll just allow them. +42. We're even throwing in an "ordered" list item here. +- This applies to over-back blocks as well, by the way. +10. We can do whatever we want, because conforming to the spec here would + be needlessly complex. +- It's not like + markdown cares either. + (Does it actually, though?) +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("single pod list items between paragraphs to markdown", () => { + const fileContents = `\ +=pod + +There should be an empty line after this ordinary paragraph. + +After this one as well. + +=item This item is followed by an empty line. + +Hello, I'm an ordinary paragraph, and I'm followed by an empty line. + +Another one follows after this paragraph. + +=head3 Let's interleave more! + +=item * + +Item followed by empty line. + +Empty line after me. + +=item Item followed by empty line. + +Empty line after me. + +=item 42. + +Item followed by empty line. + +Empty line after me, then EOF. + +=cut +`; + + const expected = `\ +There should be an empty line after this ordinary paragraph. + +After this one as well. + +- This item is followed by an empty line. + +Hello, I'm an ordinary paragraph, and I'm followed by an empty line. + +Another one follows after this paragraph. + +##### Let's interleave more! + +- Item followed by empty line. + +Empty line after me. + +- Item followed by empty line. + +Empty line after me. + +42. Item followed by empty line. + +Empty line after me, then EOF. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); + +describe("pod data blocks to markdown", () => { + test("single data block to markdown code block", () => { + const fileContents = `\ +=pod + +=begin some-data ...with extra parameters that get ignored + +Ordinary paragraphs... + + and verbatim paragraphs... + +just get parsed and internally converted to "data paragraphs" (which is +what the spec calls them). + + + +Multiple line breaks aren't preserved, though. Not sure if this matters, +but it makes things simpler. + +=end some-data + +=cut +`; + + const expected = `\ + +Ordinary paragraphs... + + and verbatim paragraphs... + +just get parsed and internally converted to "data paragraphs" (which is +what the spec calls them). + +Multiple line breaks aren't preserved, though. Not sure if this matters, +but it makes things simpler. + +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("HTML data block to markdown HTML code block", () => { + const fileContents = `\ +=pod + +=begin html + + + + Hello World! + + +

Hello World!

+

My purpose is to be a test case. Please free me.

+ + + +=end html + +=cut`; + + const expected = `\ +\`\`\`html + + + Hello World! + + +

Hello World!

+

My purpose is to be a test case. Please free me.

+ + +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested data blocks to nested markdown code blocks", () => { + const fileContents = `\ +=pod + +=begin foo + +Yeah, nesting is possible. + +=begin bar + +Because that's what the spec wants. + +And because it wasn't that hard to implement. + +=begin html + + + + +

Even if...

+ + + +=end html + +...this looks really weird. + +=begin html + + + + +

And out of place.

+ + + +=begin html + + + + +

Like genuinely weird.

+ + + +=end html + +=end html + +=end bar + +But hey, we can handle it. + +=end foo + +=cut +`; + + const expected = `\ + +Yeah, nesting is possible. + + +Because that's what the spec wants. + +And because it wasn't that hard to implement. + +\`\`\`html + + + +

Even if...

+ + +\`\`\` + +...this looks really weird. + +\`\`\`html + + + +

And out of place.

+ + +\`\`\` +\`\`\`html + + + +

Like genuinely weird.

+ + +\`\`\` +\`\`\`html +\`\`\` + + + +But hey, we can handle it. + +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("single normal data block to markdown", () => { + const fileContents = `\ +=pod + +=begin :foo + +This stuff in here gets treated as regular POD. + +=head3 Including commands. + +=over 3.5 + +=item Not gonna test this too thoroughly. + +=item 42. + +Because this isn't handled in any special manner. + +=item + +It really isn't. + +=back + + So yeah. The block above doesn't exist in Markdown at all. + + You won't even know it's there. + +=end :foo + +=cut +`; + + const expected = `\ +This stuff in here gets treated as regular POD. + +##### Including commands. + +- Not gonna test this too thoroughly. +42. Because this isn't handled in any special manner. +- It really isn't. + +\`\`\`perl + So yeah. The block above doesn't exist in Markdown at all. + + You won't even know it's there. +\`\`\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested normal data blocks to markdown", () => { + const fileContents = `\ +=pod + +=begin :foo + +=head1 Foo. + +=begin :bar + +=head2 Bar. + +Lorem ipsum dolor sit amet. + +=end :bar + +Consectetur adipiscing elit. + +=end :foo + +=cut +`; + + const expected = `\ +### Foo. + +#### Bar. + +Lorem ipsum dolor sit amet. + +Consectetur adipiscing elit. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("nested mixed data blocks to markdown", () => { + const fileContents = `\ +=pod + +=begin data + +This is where things get interesting. + +=begin html + +

Because the spec allows nesting data paragraphs ...

+ +=begin :no-data-here-lolz + +... with non-data data paragraphs. + +=head1 So it's possible to put headers in here, for example. + +=begin inner-data + +Also, you can add more begin-end blocks deeper inside all of this. + +Surprisingly, this wasn't too hard to support. + +=end inner-data + +=end :no-data-here-lolz + +

And then you can just continue with your HTML or something.

+ +=end html + +It's... odd, to say the least. + +=end data + +=cut +`; + + const expected = `\ + +This is where things get interesting. + +\`\`\`html +

Because the spec allows nesting data paragraphs ...

+\`\`\` +... with non-data data paragraphs. + +### So it's possible to put headers in here, for example. + + +Also, you can add more begin-end blocks deeper inside all of this. + +Surprisingly, this wasn't too hard to support. + +\`\`\`html +

And then you can just continue with your HTML or something.

+\`\`\` + +It's... odd, to say the least. + +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); + +describe("markdown inline formatting", () => { + test("pod bold to markdown bold", () => { + const fileContents = `\ +=pod + +=head1 B + +B<< This paragraph is in bold. >> + + B + +=item B<<< This item is in bold. >>> But not here. B<< Here we go again. >> + +=item * + +B B B + +=item 42. + +And B<<<<< this >>>>> one too. + +=cut +`; + + const expected = `\ +### **Bold header.** + +**This paragraph is in bold.** + +\`\`\`perl + B +\`\`\` + +- **This item is in bold.** But not here. **Here we go again.** +- **So is this one.** **Twice.** **Thrice.** +42. And **this** one too. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod italics to markdown bold", () => { + const fileContents = `\ +=pod + +=head1 I
+ +I<< This paragraph is in italics. >> + + I + +=item I<<< This item is in italics. >>> But not here. I<< Here we go again. >> + +=item * + +I I I + +=item 42. + +And I<<<<< this >>>>> one too. + +=cut +`; + + const expected = `\ +### *Header in italics.* + +*This paragraph is in italics.* + +\`\`\`perl + I +\`\`\` + +- *This item is in italics.* But not here. *Here we go again.* +- *So is this one.* *Twice.* *Thrice.* +42. And *this* one too. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod bold italics to markdown bold", () => { + const fileContents = `\ +=pod + +=head1 I> B> + +I<< B<< This paragraph is in bold italics. >> >> + + B> + +=item B<< I<<< This item is in bold italics. >>> >> But not here. B<< I<< Here it is again. >> >> + +=item * + +B>> Not here. B>> + +=item 42. + +And I>> one too. + +=cut +`; + + const expected = `\ +### ***Header in bold italics.*** ***In two different ways.*** + +***This paragraph is in bold italics.*** + +\`\`\`perl + B> +\`\`\` + +- ***This item is in bold italics.*** But not here. ***Here it is again.*** +- ***So is this one.*** Not here. ***And we're back.*** +42. And ***this*** one too. +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); + + test("pod inline code to markdown inline code", () => { + const fileContents = `\ +=pod + +=head1 C Doesn't matter C. + +C<< This paragraph is inline code. >> + + C + +=item C<<< This item is inline code. >>> + +=item * + +C But not here. C + +=item 42. + +C C C + +=cut +`; + + const expected = `\ +### \`Headers allow inline code.\` Doesn't matter \`where\`. + +\`This paragraph is inline code.\` + +\`\`\`perl + C +\`\`\` + +- \`This item is inline code.\` +- \`So is this one.\` But not here. \`Here it's code again.\` +42. \`Same goes for this one.\` \`Twice.\` \`Thrice.\` +`; + + const result = podToMd(fileContents); + + expect(result).toEqual(expected); + }); +}); diff --git a/server/src/pod.ts b/server/src/pod.ts index e9be6ab..210482e 100644 --- a/server/src/pod.ts +++ b/server/src/pod.ts @@ -3,98 +3,1944 @@ import { PerlDocument, PerlElem, PerlSymbolKind } from "./types"; import Uri from "vscode-uri"; import { isFile } from "./utils"; -export async function getPod(elem: PerlElem, perlDoc: PerlDocument, modMap: Map): Promise { - // File may not exists. Return nothing if it doesn't +/** A paragraph whose first line matches `^[ \t]`. + * + * May also be *inside* `=begin [formatname]` and `=end [formatname]` commands, + * as long as [formatname] starts with a colon `:`. + */ +export interface VerbatimParagraph { + kind: "verbatim"; + lineNo?: number; + lines: Array; +} + +/** Not a CommandParagraph and not a VerbatimParagraph. Basically just + * arbitrary text. + * + * May also be *inside* `=begin [formatname]` and `=end [formatname]` commands, + * as long as [formatname] starts with a colon `:`. + */ +export interface OrdinaryParagraph { + kind: "ordinary"; + lineNo?: number; + lines: Array; +} + +/** Contents *inside* `=begin [formatname] [parameter]` and `=end [formatname]` + * commands, as long as [formatname] does *not* start with a colon `:`. + */ +export interface DataParagraph { + kind: "data"; + lineNo?: number; + lines: Array; +} + +// Concrete command paragraphs +// +// Note: `=pod` and `=cut` aren't typed here, as they're already represented +// by a `PodBlock`. + +export const enum HeaderLevel { + One = 1, + Two, + Three, + Four, + Five, + Six, +} + +/** Represents `=head1` until `=head6`. + */ +export interface HeaderParagraph { + kind: "head"; + lineNo?: number; + level: HeaderLevel; + contents: string; +} + +/** Represents `=over`. + */ +export interface OverParagraph { + kind: "over"; + lineNo?: number; + level: number; // non-zero and `4` by default +} + +/** Represents `=back`. + */ +export interface BackParagraph { + kind: "back"; + lineNo?: number; +} + +/** Represents `=item *` or a plain `=item`. + * May be followed by text. + */ +export interface UnordererdItemParagraph { + kind: "unordereditem"; + lineNo?: number; + lines?: Array; +} + +/** `=item N` or `=item N.` where `N` is any whole number. + * May be followed by text. + */ +export interface OrderedItemParagraph { + kind: "ordereditem"; + num: number; + lineNo?: number; + lines?: Array; +} + +/** Represents `=encoding [encodingname]` - currently parsed, but unused. + */ +export interface EncodingParagraph { + kind: "encoding"; + lineNo?: number; + name: string; +} + +/** Represents `=begin [formatname] [parameter]`. + */ +export interface BeginParagraph { + kind: "begin"; + lineNo?: number; + formatname: string; + parameter: string; +} + +/** Represents `=end [formatname]`. + */ +export interface EndParagraph { + kind: "end"; + lineNo?: number; + formatname: string; +} + +/** Represents `=for [formatname] [contents]`. + * If `formatname` begins with a colon `:`, `contents` will be interpreted + * as an ordinary paragraph. + * + * If it doesn't begin with a colon, `contents` will be interpreted as a data + * paragraph. + */ +export interface ForParagraph { + kind: "for"; + lineNo?: number; + formatname: string; + lines: Array; +} + +/** Yielded if none of the other command paragraphs match. + */ +export interface UnknownCommandParagraph { + kind: "unknown"; + lineNo?: number; + cmd: string; + contents: string; +} + +export type CommandParagraph = HeaderParagraph + | OverParagraph + | BackParagraph + | UnordererdItemParagraph + | OrderedItemParagraph + | EncodingParagraph + | BeginParagraph + | EndParagraph + | ForParagraph + | UnknownCommandParagraph; + +export type PodParagraph = CommandParagraph + | VerbatimParagraph + | OrdinaryParagraph + | DataParagraph; + +/** Represents the "raw" raw paragraphs between `=pod ... =cut` commands. + * "Raw" here means that all kinds of paragraphs can appear anywhere and in any + * order -- no checks (beyond parsing the paragraphs correctly) are performed. + * + * During the parser's second pass, the paragraphs in this block are then + * checked for their validity, e.g. whether `=over` is followed by a `=back` + * and so on, before processing the block into a `PodBlock`. + * + * Repeated occurrences of `=pod` and `=cut` commands are ignored when this + * block is being constructed. + */ +export interface RawPodBlock { + kind: "rawpodblock"; + lineNo?: number; + paragraphs: Array; +} + +export type PodBlockContent = VerbatimParagraph + | OrdinaryParagraph + | HeaderParagraph + | UnordererdItemParagraph + | OrderedItemParagraph + | EncodingParagraph + | UnknownCommandParagraph + | OverBlock + | DataBlock + | NormalDataBlock; + +/** Represents a list of paragraphs and other blocks between `=pod ... =cut` commands. + * + * This kind of block is created by processing a `RawPodBlock` during the parser's + * second pass. + */ +export interface PodBlock { + kind: "podblock"; + lineNo?: number; + paragraphs: Array; +} + +export type OverBlockContent = VerbatimParagraph + | OrdinaryParagraph + | HeaderParagraph + | UnordererdItemParagraph + | OrderedItemParagraph + | EncodingParagraph + | UnknownCommandParagraph + | OverBlock + | DataBlock + | NormalDataBlock; + +/** Represents an `=over` ... `=back` block. + * - Cannot be empty + * - Cannot contain headers (HeaderParagraphs) + */ +export interface OverBlock { + kind: "overblock"; + lineNo?: number; + level: number; // non-zero and `4` by default + paragraphs: Array; +} + +export type DataBlockContent = DataParagraph + | DataBlock + | NormalDataBlock; + +/** Represents a `=begin [formatname] [parameter]` ... `=end [formatname]` block. + * `formatname` must not begin with a colon `:`. + * + * This may also represents a `=for [formatname] text...` command. + * + * Other command paragraphs may *not* appear inside this type of block. + * Verbatim and ordinary paragraphs become data paragraphs. + */ +export interface DataBlock { + kind: "datablock"; + lineNo?: number; + formatname: string; + parameter: string; + paragraphs: Array; +} + +/** Like a `DataBlock`, but `formatname` begins with a colon `:`. + * This means that the contents inside the `=begin ... =end` block are subject + * to normal processing. + */ +export interface NormalDataBlock { + kind: "normaldatablock"; + lineNo?: number; + formatname: string; + parameter: string; + paragraphs: Array; +} + +/** Represents a POD document which hasn't yet been processed further. + */ +export interface RawPodDocument { + kind: "rawpoddocument", + blocks: Array; +} + +/** A completely parsed and processed POD document. + */ +export interface PodDocument { + kind: "poddocument", + blocks: Array; +} + +/** Represents the iteration over lines of text. + * Doesn't actually conform to the iterator protocol. + * + * Is used as a helper structure for {@link RawPodParser}, allowing the parser + * to backtrack when trying to parse `=item` paragraphs without text followed + * by an ordinary paragraph. + */ +class LinesIterator { + #lines: Array; + #currentLineNo: number; + #savedLineNumbers: Array; + + constructor(contents: string) { + this.#lines = contents.split(/\r?\n/); + this.#currentLineNo = 0; + this.#savedLineNumbers = []; + } + + next(): string | undefined { + if (this.#currentLineNo < this.#lines.length) { + return this.#lines[this.#currentLineNo++]; + } + + return undefined; + } + + currentLineNo(): number { + return this.#currentLineNo; + } + + save() { + this.#savedLineNumbers.push(this.#currentLineNo); + } + + rewind() { + this.#currentLineNo = this.#savedLineNumbers.pop() ?? 0; + } +} + +/** Tracks the state for parsing POD content from a file. + * See {@link parse} for more information. + */ +export class RawPodParser { + #lineIter: LinesIterator = new LinesIterator(""); + #currentBlock?: RawPodBlock = undefined; + #parsedBlocks: Array = []; + + /** Parses and returns POD content from the given file contents. + * Note that this returns a {@link RawPodDocument} on success, which contains + * POD content that hasn't been processed and checked for validity yet. + * This is done via the {@link PodProcessor}. + */ + parse(fileContents: string): RawPodDocument { + // Reset state + this.#lineIter = new LinesIterator(fileContents); + this.#currentBlock = undefined; + this.#parsedBlocks = []; + + let line: string | undefined; + while (true) { + line = this.#lineIter.next(); + + // EOF + if (line === undefined) { + break; + } + + // line is empty + if (line === "") { + continue; + } + + if (RawPodParser.#isCommandParagraph(line)) { + if (line.startsWith("=cut")) { + if (this.#currentBlock !== undefined) { + this.#parsedBlocks.push(this.#currentBlock); + this.#currentBlock = undefined; + } + + // ignoring repeated `=cut`s here, because they don't really matter + + this.#skipUntilEmptyLine(); + continue; + } + + if (this.#currentBlock === undefined) { + this.#currentBlock = { + kind: "rawpodblock", + lineNo: this.#lineIter.currentLineNo(), + paragraphs: [] + }; + } + + if (line.startsWith("=pod")) { + this.#skipUntilEmptyLine(); + continue; + } + + // other command paragraphs + let paraResult = this.#tryParseCommand(line); + + // no need to skip to an empty line here, as that is handled for + // each paragraph in tryParseCommand + + if (paraResult) { + this.#currentBlock.paragraphs.push(paraResult); + } + + continue; + } + + if (this.#currentBlock === undefined) { + continue; + } + + if (RawPodParser.#isVerbatimParagraph(line)) { + let para = this.#parseVerbatim(line); + + this.#currentBlock.paragraphs.push(para); + continue; + } + + let para = this.#parseOrdinary(line); + this.#currentBlock.paragraphs.push(para); + } + + // allow file to end without needing a matching =cut + if (this.#currentBlock !== undefined) { + this.#parsedBlocks.push(this.#currentBlock); + this.#currentBlock = undefined; + } + + return { + kind: "rawpoddocument", + blocks: this.#parsedBlocks, + }; + } + + static #isCommandParagraph(line: string): boolean { + return /^=[a-zA-Z]/.test(line); + } + + static #isVerbatimParagraph(line: string): boolean { + return /^[ \t]/.test(line); + } + + static #isOrdinaryParagraph(line: string): boolean { + return !(RawPodParser.#isCommandParagraph(line) || RawPodParser.#isVerbatimParagraph(line)); + } + + #skipUntilEmptyLine(): void { + let line: string | undefined; + + while (true) { + line = this.#lineIter.next(); + + if (!line) { + return; + } + } + } + + #appendNextLineUntilEmptyLine( + content: string, + trimOpts: { trimStart?: boolean, trimEnd?: boolean } = {} + ): string { + let line: string | undefined; + + while (line = this.#lineIter.next()) { + if (trimOpts.trimStart && trimOpts.trimEnd) { + line = line.trim(); + } else if (trimOpts.trimStart) { + line = line.trimStart(); + } else if (trimOpts.trimEnd) { + line = line.trimEnd(); + } + + content += " " + line; + } + + return content; + } + + + static #parsedLevelToHeaderLevel(matchedLevel: string): HeaderLevel | undefined { + const level = parseInt(matchedLevel); + + if (isNaN(level)) { + return; + } + + const levels = [ + undefined, + HeaderLevel.One, + HeaderLevel.Two, + HeaderLevel.Three, + HeaderLevel.Four, + HeaderLevel.Five, + HeaderLevel.Six, + ] as const; + + return levels[level]; + } + + /** Tries to parse a command paragraph. + * The passed `line` is expected to have matched `/^=[a-zA-Z]/` beforehand. + */ + #tryParseCommand(line: string): PodParagraph | undefined { + line = line.trimEnd(); + const lineNo = this.#lineIter.currentLineNo(); + + let matchResult; + + // =head[1-6] + matchResult = [...line.matchAll(/^=head(?[1-6])(\s+(?.*))?/g)][0]; + if (matchResult !== undefined) { + // Casts here are fine, because we only match expected level in regex + const matchedLevel = matchResult.groups?.level as string; + const level = RawPodParser.#parsedLevelToHeaderLevel(matchedLevel) as HeaderLevel; + + let contents = matchResult.groups?.contents || ""; + contents = this.#appendNextLineUntilEmptyLine( + contents, { trimStart: true, trimEnd: true } + ); + + let para: HeaderParagraph = { + kind: "head", + lineNo: lineNo, + contents: contents, + level: level, + }; + + return para; + } + + // =item + // =item\s+* + // =item\s+\d+\.? + // =item\s+[text...] + matchResult = [...line.matchAll(/^=item(\s+((?\*)\s*|((?\d+)\.?\s*))?)?(?.*)?/g)][0]; + if (matchResult !== undefined) { + // =item * + let asterisk = matchResult.groups?.asterisk; + if (asterisk) { + let text = matchResult.groups?.text; + + let para: UnordererdItemParagraph = { + kind: "unordereditem", + lineNo: lineNo, + }; + + if (text) { + this.#appendNextLineUntilEmptyLine(text, { trimStart: true, trimEnd: true }); + para.lines = [text]; + + return para; + } + + this.#skipUntilEmptyLine(); + let ordinaryPara = this.#parseNextOrdinaryOrRewind(); + if (ordinaryPara) { + para.lines = ordinaryPara.lines; + } + + return para; + } + + // =item N. + let num = matchResult.groups?.num; + if (num) { + let text = matchResult.groups?.text; + + let para: OrderedItemParagraph = { + kind: "ordereditem", + num: parseInt(num), + lineNo: lineNo, + }; + + if (text) { + this.#appendNextLineUntilEmptyLine(text, { trimStart: true, trimEnd: true }); + para.lines = [text]; + + return para; + } + + this.#skipUntilEmptyLine(); + let ordinaryPara = this.#parseNextOrdinaryOrRewind(); + if (ordinaryPara) { + para.lines = ordinaryPara.lines; + } + + return para; + } + + // =item Lorem ipsum dolor ... + let text = matchResult.groups?.text; + if (text) { + let currentLine: string | undefined = text; + let lines: Array = []; + + while (currentLine) { + lines.push(currentLine.trim()); + + currentLine = this.#lineIter.next(); + } + + let para: UnordererdItemParagraph = { + kind: "unordereditem", + lineNo: lineNo, + lines: lines, + }; + + return para; + } + + // =item + let para: UnordererdItemParagraph = { + kind: "unordereditem", + lineNo: lineNo, + }; + + this.#skipUntilEmptyLine(); + let ordinaryPara = this.#parseNextOrdinaryOrRewind(); + if (ordinaryPara) { + para.lines = ordinaryPara.lines; + } + + return para; + } + + // =encoding + matchResult = [...line.matchAll(/^=encoding\s+(?\S+)/g)][0]; + if (matchResult !== undefined) { + let name = matchResult.groups?.name || ""; + + this.#skipUntilEmptyLine(); + + let para: EncodingParagraph = { + kind: "encoding", + lineNo: lineNo, + name: name, + }; + + return para; + } + + // =over + matchResult = [...line.matchAll(/^=over(\s+(?\d+(\.\d*)?))?/g)][0]; + if (matchResult !== undefined) { + let matchedLevel = matchResult.groups?.num; + + let level: number = 0; + + if (matchedLevel !== undefined) { + level = parseFloat(matchedLevel); + } + + const defaultOverLevel = 4; + + level = level > 0 ? level : defaultOverLevel; + + this.#skipUntilEmptyLine(); + + let para: OverParagraph = { + kind: "over", + lineNo: lineNo, + level: level, + }; + + return para; + } + + // =back + if (line.startsWith("=back")) { + this.#skipUntilEmptyLine(); + + let para: BackParagraph = { + kind: "back", + lineNo: lineNo, + }; + + return para; + } + + // =begin + matchResult = [ + ...line.matchAll( + /^=begin(\s+(?:?[-a-zA-Z0-9_]+)(\s+(?.*))?)?/g + ) + ][0]; + if (matchResult !== undefined) { + let formatname = matchResult.groups?.formatname ?? ""; + + let parameter = matchResult.groups?.parameter || ""; + parameter = this.#appendNextLineUntilEmptyLine(parameter).trim(); + + let para: BeginParagraph = { + kind: "begin", + lineNo: lineNo, + formatname: formatname.trim(), + parameter: parameter, + }; + + return para; + } + + // =end + matchResult = [...line.matchAll(/^=end(\s+(?:?[-a-zA-Z0-9_]+))?/g)][0]; + if (matchResult !== undefined) { + let formatname = matchResult.groups?.formatname ?? ""; + + this.#skipUntilEmptyLine(); + + let para: EndParagraph = { + kind: "end", + lineNo: lineNo, + formatname: formatname.trim(), + }; + + return para; + } + + // =for + matchResult = [ + ...line.matchAll(/^=for(\s+(?:?[-a-zA-Z0-9_]+)(\s+(?.*))?)?/g) + ][0]; + if (matchResult !== undefined) { + const formatname = matchResult.groups?.formatname ?? ""; + + let contents = (matchResult.groups?.contents || "").trim(); + + // similar to parsing an ordinary or verbatim paragraph + let currentLine: string | undefined = contents; + let lines: Array = []; + + while (currentLine) { + lines.push(currentLine.trimEnd()); + + currentLine = this.#lineIter.next(); + } + + let para: ForParagraph = { + kind: "for", + lineNo: lineNo, + formatname: formatname.trim(), + lines: lines, + }; + + return para; + } + + // unknown command paragraph; just parse it so we can toss it later + matchResult = [...line.matchAll(/^=(?\S+)(\s+(?.*))?/g)][0]; + if (matchResult !== undefined) { + let contents = matchResult.groups?.contents || ""; + contents = this.#appendNextLineUntilEmptyLine(contents); + + let para: UnknownCommandParagraph = { + kind: "unknown", + lineNo: lineNo, + cmd: matchResult.groups?.cmd as string, + contents: contents, + }; + + return para; + } + } + + /** Parses a verbatim paragraph. + * The passed `line` is expected to have matched `/^[ \t]/` beforehand. + */ + #parseVerbatim(line: string): VerbatimParagraph { + let currentLine: string | undefined = line; + const lineNo = this.#lineIter.currentLineNo(); + + let lines: Array = []; + + // breaks if undefined or empty line + while (currentLine) { + lines.push(currentLine.trimEnd()); + + currentLine = this.#lineIter.next(); + } + + return { + kind: "verbatim", + lineNo: lineNo, + lines: lines, + }; + } + + /** Parses an ordinary paragraph. + * The passed `line` is expected to have matched neither`/^=[a-zA-Z]` or + * `/^[ \t]` beforehand. + */ + #parseOrdinary(line: string): OrdinaryParagraph { + let currentLine: string | undefined = line; + const lineNo = this.#lineIter.currentLineNo(); + + let lines: Array = []; + + // breaks if undefined or empty line + while (currentLine) { + lines.push(currentLine); + + currentLine = this.#lineIter.next(); + } + + return { + kind: "ordinary", + lineNo: lineNo, + lines: lines, + }; + } + + /** Tries to parse the next paragraph as ordinary paragraph. + * If the next paragraph is of some other type, rewinds the internal line + * iterator back to before it started parsing and returns `undefined`. + */ + #parseNextOrdinaryOrRewind(): OrdinaryParagraph | undefined { + this.#lineIter.save(); + + let line: string | undefined; + + // Advance until ordinary paragraph and return that, or rewind + while (true) { + line = this.#lineIter.next(); + + // EOF + if (line === undefined) { + return; + } + + // line is empty + if (line === "") { + continue; + } + + if (RawPodParser.#isOrdinaryParagraph(line)) { + return this.#parseOrdinary(line); + } + + // Encountered something else, so rewind and return + this.#lineIter.rewind(); + return; + } + } +} + +/** Tracks the state for processing a {@link RawPodDocument} into a proper + * {@link PodDocument}. + */ +export class PodProcessor { + #blockIter: Generator = this.#makeBlockIter([]); + #processedBlocks: Array = []; + + /** Processes a {@link RawPodDocument} into a proper {@link PodDocument}. + * + * This checks whether the given raw document is valid (conforms as much to + * the POD specification as possible) and also merges certain paragraphs for + * ease of use. + */ + process(document: RawPodDocument): PodDocument { + // Reset state + this.#blockIter = this.#makeBlockIter(document.blocks); + this.#processedBlocks = []; + + const blockProcessor = new PodBlockProcessor(); + + let currentBlock = this.#getNextBlock(); + while (currentBlock) { + const processedBlockResult = blockProcessor.process(currentBlock); + + this.#processedBlocks.push(processedBlockResult); + currentBlock = this.#getNextBlock(); + } + + return { + kind: "poddocument", + blocks: this.#processedBlocks, + }; + } + + *#makeBlockIter(rawBlocks: Array) { + yield* rawBlocks; + } + + #getNextBlock(): RawPodBlock | undefined { + let { value, done } = this.#blockIter.next(); + + if (done || value === undefined) { + return; + } + + return value; + } +} + +/** Inner workings of {@link PodProcessor}. */ +class PodBlockProcessor { + #paragraphIter: Generator = this.#makeParagraphIter([]); + #podBlock: PodBlock = { kind: "podblock", paragraphs: [] }; + + *#makeParagraphIter(paragraphs: Array) { + yield* paragraphs; + } + + #getNextParagraph(): PodParagraph | undefined { + let { value, done } = this.#paragraphIter.next(); + + if (done || value === undefined) { + return; + } + + return value; + } + + process(block: RawPodBlock): PodBlock { + // Reset state + this.#paragraphIter = this.#makeParagraphIter(block.paragraphs); + this.#podBlock = { kind: "podblock", paragraphs: [] }; + + let para: PodParagraph | undefined; + let previousPara: PodParagraph | undefined; + + while (true) { + previousPara = para; + para = this.#getNextParagraph(); + + if (!para) { + break; + } + + switch (para.kind) { + case "verbatim": + const lastPara = this.#podBlock.paragraphs[this.#podBlock.paragraphs.length - 1]; + + // Merge verbatim paragraphs for easier conversion later. + if (lastPara && lastPara.kind === "verbatim") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedVerbatim: VerbatimParagraph = { + kind: "verbatim", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + this.#podBlock.paragraphs[this.#podBlock.paragraphs.length - 1] = mergedVerbatim; + break; + } + + this.#podBlock.paragraphs.push(para); + break; + case "ordinary": + case "unordereditem": + case "ordereditem": + case "head": + this.#podBlock.paragraphs.push(para); + break; + case "over": + this.#podBlock.paragraphs.push(this.#enterOverBlock(para)); + break; + case "begin": + this.#podBlock.paragraphs.push(this.#enterDataBlock(para)); + break; + case "for": + this.#podBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); + break; + case "data": // should not be possible to appear here, so ignore it + case "back": // doesn't have a matching =over, so ignore it + case "end": // doesn't have matching =begin, so ignore it + case "encoding": // ignored + case "unknown": // ignored + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return this.#podBlock; + } + + // `level` must be non-zero. + #enterOverBlock(paragraph: OverParagraph): OverBlock { + let overBlock: OverBlock = { + kind: "overblock", + lineNo: paragraph.lineNo, + level: paragraph.level, + paragraphs: [], + }; + + let isProcessingBlock = true; // used to exit the loop from within switch + let para: PodParagraph | undefined; + + while (isProcessingBlock) { + para = this.#getNextParagraph(); + + if (para === undefined) { + isProcessingBlock = false; + break; + } + + switch (para.kind) { + case "verbatim": + const lastPara = overBlock.paragraphs[overBlock.paragraphs.length - 1]; + + // Merge verbatim paragraphs for easier conversion later. + if (lastPara && lastPara.kind === "verbatim") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedVerbatim: VerbatimParagraph = { + kind: "verbatim", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + overBlock.paragraphs[overBlock.paragraphs.length - 1] = mergedVerbatim; + break; + } + + overBlock.paragraphs.push(para); + break; + case "head": // technically not allowed by spec, but we tolerate it anyways + case "ordinary": + case "unordereditem": + case "ordereditem": + overBlock.paragraphs.push(para); + break; + case "over": + overBlock.paragraphs.push(this.#enterOverBlock(para)); + break; + case "back": + isProcessingBlock = false; + break; + case "begin": + overBlock.paragraphs.push(this.#enterDataBlock(para)); + break; + case "for": + overBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); + break; + case "data": // should not be possible to appear here, so ignore it + case "end": // doesn't have matching =begin, so ignore it + case "encoding": // ignored + case "unknown": // ignored + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return overBlock; + } + + #enterDataBlock(paragraph: BeginParagraph): DataBlock | NormalDataBlock { + if (paragraph.formatname.startsWith(":")) { + return this.#buildNormalDataBlock(paragraph); + } else { + return this.#buildDataBlock(paragraph); + } + } + + #buildDataBlock(paragraph: BeginParagraph): DataBlock { + let dataBlock: DataBlock = { + kind: "datablock", + formatname: paragraph.formatname, + parameter: paragraph.parameter, + paragraphs: [], + }; + + let isProcessingBlock = true; // used to exit the loop from within switch + let para: PodParagraph | undefined; + + while (isProcessingBlock) { + para = this.#getNextParagraph(); + + if (para === undefined) { + isProcessingBlock = false; + break; + } + + switch (para.kind) { + case "ordinary": + case "verbatim": + const lastPara = dataBlock.paragraphs[dataBlock.paragraphs.length - 1]; + + // Ordinary and verbatim paragraphs are merged into the previous data paragraph. + if (lastPara && lastPara.kind === "data") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedData: DataParagraph = { + kind: "data", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + dataBlock.paragraphs[dataBlock.paragraphs.length - 1] = mergedData; + break; + } + + let dataPara: DataParagraph = { + kind: "data", + lines: para.lines, + }; + + dataBlock.paragraphs.push(dataPara); + + break; + case "data": + // Should not be appearing here, but since it's a data + // paragraph, just add it + dataBlock.paragraphs.push(para); + case "encoding": + case "unordereditem": + case "ordereditem": + case "head": + case "over": + case "back": + case "unknown": + // None of these paragraphs are allowed per the perlpodspec, + // so just ignore them + break; + case "begin": + dataBlock.paragraphs.push(this.#enterDataBlock(para)); + break; + case "end": + // Normally the formatname parameter of =begin and =end + // blocks must match, but we're staying fault-tolerant here + isProcessingBlock = false; + break; + case "for": + dataBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return dataBlock; + } + + #buildNormalDataBlock(paragraph: BeginParagraph): NormalDataBlock { + let dataBlock: NormalDataBlock = { + kind: "normaldatablock", + formatname: paragraph.formatname, + parameter: paragraph.parameter, + paragraphs: [], + }; + + let isProcessingBlock = true; // used to exit the loop from within switch + let para: PodParagraph | undefined; + + while (isProcessingBlock) { + para = this.#getNextParagraph(); + + if (para === undefined) { + isProcessingBlock = false; + break; + } + + switch (para.kind) { + case "verbatim": + const lastPara = dataBlock.paragraphs[dataBlock.paragraphs.length - 1]; + + // Merge verbatim paragraphs for easier conversion later. + if (lastPara && lastPara.kind === "verbatim") { + let mergedLines = [...lastPara.lines, "", ...para.lines]; + + let mergedVerbatim: VerbatimParagraph = { + kind: "verbatim", + lineNo: lastPara.lineNo, + lines: mergedLines, + }; + + dataBlock.paragraphs[dataBlock.paragraphs.length - 1] = mergedVerbatim; + break; + } + + dataBlock.paragraphs.push(para); + break; + case "ordinary": + case "unordereditem": + case "ordereditem": + case "head": + dataBlock.paragraphs.push(para); + break; + case "over": + dataBlock.paragraphs.push(this.#enterOverBlock(para)); + break; + case "begin": + dataBlock.paragraphs.push(this.#enterDataBlock(para)); + break; + case "end": + // Normally the formatname parameter of =begin and =end + // blocks must match, but we're staying fault-tolerant here + isProcessingBlock = false; + break; + case "for": + dataBlock.paragraphs.push(this.#buildDataBlockFromForPara(para)); + break; + case "data": // should not be possible to appear here, so ignore it + case "back": // doesn't have a matching =over, so ignore it + case "encoding": // ignored + case "unknown": // ignored + break; + default: + const _exhaustiveCheck: never = para; + return _exhaustiveCheck; + } + } + + return dataBlock; + } + + #buildDataBlockFromForPara(paragraph: ForParagraph): DataBlock | NormalDataBlock { + if (paragraph.formatname.startsWith(":")) { + let paragraphs: Array; + + if (paragraph.lines.length === 0) { + paragraphs = []; + } else { + paragraphs = [ + { + kind: "ordinary", + lines: paragraph.lines, + } + ]; + } + + return { + kind: "normaldatablock", + formatname: paragraph.formatname, + parameter: "", + paragraphs: paragraphs, + }; + } + + let paragraphs: Array; + if (paragraph.lines.length === 0) { + paragraphs = []; + } else { + paragraphs = [ + { + kind: "data", + lines: paragraph.lines, + } + ]; + } + + return { + kind: "datablock", + formatname: paragraph.formatname, + parameter: "", + paragraphs: paragraphs, + }; + } +} + +/** Tracks the state for converting a {@link PodDocument} or {@link PodBlock} + * into Markdown. + */ +export class PodToMarkdownConverter { + #blockContentIter: Generator = this.#makeBlockContentIter([]); + #overBlockIndentLevels: Array = []; + + /** Converts a {@link PodDocument} or {@link PodBlock} to Markdown. */ + convert(pod: PodDocument | PodBlock): string { + let blocks: Array; + + if (pod.kind === "poddocument") { + blocks = pod.blocks; + } else { + blocks = [pod]; + } + + // Reset state + this.#blockContentIter = this.#makeBlockContentIter(blocks); + this.#overBlockIndentLevels = []; + + // Need to wrap getNextBlockContent into closure here, + // otherwise we get an access violation + const markdownLines = this.#convertContentUntilDone( + () => this.#getNextBlockContent() + ); + + let finalLines: Array = []; + + for (const line of markdownLines) { + let processedLine = line; + + if (processedLine.trim() === "") { + processedLine = ""; + } + + finalLines.push(processedLine); + } + + if (finalLines.length === 0) { + return ""; + } + + return finalLines.join("\n").trimEnd() + "\n"; + } + + #convertContentUntilDone( + getNext: () => PodBlockContent | undefined, + ): Array { + let lines: Array = []; + + let content: PodBlockContent | undefined; + let previousContent: PodBlockContent | undefined; + + while (true) { + previousContent = content; + content = getNext(); + + if (!content) { + break; + } + + if (!previousContent) { + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + if (isOverBlockWithItem(content)) { + if (!isItem(previousContent)) { + ensureLastLineEmpty(lines); + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + // Consecutive list items are rendered without an empty line inbetween. + // Keeps the list visually coherent. + if (!(isItem(content) && isItem(previousContent))) { + ensureLastLineEmpty(lines); + lines.push(...this.#convertBlockContent(content, getNext)); + continue; + } + + lines.push(...this.#convertBlockContent(content, getNext)); + } + + return lines; + } + + *#makeBlockContentIter(blocks: Array) { + for (const block of blocks) { + yield* block.paragraphs; + } + } + + #getNextBlockContent(): PodBlockContent | undefined { + let { value, done } = this.#blockContentIter.next(); + + if (done || value === undefined) { + return; + } + + return value; + } + + #convertBlockContent( + content: PodBlockContent, + getNext: () => PodBlockContent | undefined, + ): Array { + switch (content.kind) { + case "verbatim": + return this.#convertVerbatimPara(content); + case "ordinary": + return this.#convertOrdinaryPara(content); + case "head": + return this.#convertHeaderPara(content); + case "unordereditem": + case "ordereditem": + return this.#convertItemPara(content, getNext); + case "overblock": + return this.#convertOverBlock(content); + case "datablock": + return this.#convertDataBlock(content); + case "normaldatablock": + return this.#convertNormalDataBlock(content); + case "encoding": // ignored + case "unknown": // ignored + return []; + default: + const _exhaustiveCheck: never = content; + return _exhaustiveCheck; + } + } + + #convertVerbatimPara(verbatimPara: VerbatimParagraph): Array { + return [ + "```perl", + ...verbatimPara.lines.map((line) => tabsToSpaces(line, 2)), + "```", + ]; + } + + #convertOrdinaryPara(ordinaryPara: OrdinaryParagraph): Array { + return ordinaryPara.lines + .map((line) => tabsToSpaces(line, 2)) + .map(processInlineElements); + } + + #convertHeaderPara(headerPara: HeaderParagraph): Array { + // + 2 because we start at an h3 (###) for readability + const level = Math.min(headerPara.level + 2, 6); + + return [ + "#".repeat(level) + " " + processInlineElements(headerPara.contents) + ]; + } + + #convertItemPara( + itemPara: UnordererdItemParagraph | OrderedItemParagraph, + getNext: () => PodBlockContent | undefined, + ): Array { + let itemBeginning: string; + + if (itemPara.kind === "unordereditem") { + itemBeginning = "-"; + } else { + itemBeginning = `${itemPara.num}.`; + } + + const indentAndFormatList = (arr: Array): Array => { + if (arr.length === 0) { + return arr; + } + + let newArr: Array = []; + + newArr.push(itemBeginning + " " + arr[0]); + const indentLevel = itemBeginning.length + 1; + + for (const line of arr.slice(1)) { + newArr.push(" ".repeat(indentLevel) + line); + } + + return newArr; + }; + + if (itemPara.lines && itemPara.lines.length > 0) { + return indentAndFormatList(itemPara.lines.map(processInlineElements)); + } + + let nextContent = getNext(); + + if (!nextContent) { + return [itemBeginning]; + } + + if (nextContent.kind === "unordereditem" || nextContent.kind === "ordereditem") { + return [ + itemBeginning, + ...this.#convertItemPara(nextContent, getNext), + ]; + } + + return indentAndFormatList(this.#convertBlockContent(nextContent, getNext)); + } + + #convertOverBlock(block: OverBlock): Array { + const initialIndentLevel: number = this.#overBlockIndentLevels.reduce((a, b) => a + b, 0); + this.#overBlockIndentLevels.push(Math.round(block.level)); + + const indentList = (arr: Array): Array => { + let newArr: Array = []; + + let adjustedIndentLevel: number; + if (initialIndentLevel === 0) { + adjustedIndentLevel = 0; + } else { + adjustedIndentLevel = this.#overBlockIndentLevels + .reduce((a, b) => a + b, -initialIndentLevel); + } + + if (adjustedIndentLevel === 0) { + return arr; + } + + for (const line of arr) { + newArr.push(" ".repeat(adjustedIndentLevel) + line); + } + + return newArr; + } + + const getNext = makeOverBlockIterGetter(block); + + let lines: Array = this.#convertContentUntilDone(getNext); + + if (lines[0]?.trim() === "") { + lines.shift(); + } + + if (lines[lines.length - 1]?.trim() === "") { + lines.pop(); + } + + let result = indentList(lines); + this.#overBlockIndentLevels.pop(); + return result; + } + + #convertDataBlock(block: DataBlock): Array { + const getNext = makeDataBlockIterGetter(block); + + let dataStart: string; + let dataEnd: string; + + const formatname = block.formatname.trim(); + switch (formatname) { + case "code": + dataStart = "```perl"; + dataEnd = "```"; + break; + case "html": + dataStart = "```html"; + dataEnd = "```"; + break; + case "text": + dataStart = ""; + dataEnd = ""; + break; + default: + dataStart = ``; + dataEnd = `` + } + + let lines: Array = []; + let dataBlockPara: DataBlockContent | undefined; + + lines.push(dataStart); + + while (dataBlockPara = getNext()) { + switch (dataBlockPara.kind) { + case "data": + lines.push(...dataBlockPara.lines); + break; + case "datablock": + lines.push(dataEnd); + lines.push(...this.#convertDataBlock(dataBlockPara)); + lines.push(dataStart); + break; + case "normaldatablock": + lines.push(dataEnd); + lines.push(...this.#convertNormalDataBlock(dataBlockPara)); + lines.push(dataStart); + break; + default: + const _exhaustiveCheck: never = dataBlockPara; + return _exhaustiveCheck; + } + } + + lines.push(dataEnd); + + return lines; + } + + #convertNormalDataBlock(block: NormalDataBlock): Array { + const getNext = makeNormalDataBlockIterGetter(block); + + return this.#convertContentUntilDone(getNext); + } +} + +/** Appends an empty line if the last element in the list isn't an empty line already. */ +function ensureLastLineEmpty(list: Array) { + if (list.at(-1)?.trim() !== "") { + list.push(""); + } +} + +function isItem(content: PodBlockContent): content is UnordererdItemParagraph | OrderedItemParagraph { + return ["unordereditem", "ordereditem"].includes(content.kind); +} + +function isOverBlockWithItem(content: PodBlockContent): content is OverBlock { + if (content.kind === "overblock") { + const firstBlockContent = content.paragraphs.at(0); + return firstBlockContent !== undefined && isItem(firstBlockContent); + } + + return false; +} - const absolutePath = await resolvePathForDoc(elem, perlDoc, modMap); +function tabsToSpaces(line: string, spacesPerTab: number = 4): string { + return line.replaceAll("\t", " ".repeat(spacesPerTab)); +} + +function makePodDocContentIterGetter(podDoc: PodDocument): () => PodBlockContent | undefined { + const podDocContentIter = function*(): Generator { + for (const block of podDoc.blocks) { + yield* block.paragraphs; + } + } - if(!absolutePath) return; + const iter = podDocContentIter(); - try { - var fileContent = await fs.promises.readFile(absolutePath, "utf8"); - } catch { - return; + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + +function makeOverBlockIterGetter(block: OverBlock): () => OverBlockContent | undefined { + const overBlockIter = function*(): Generator { + yield* block.paragraphs; } - // Initialize state variables - let inPodBlock = false; - let inRelevantBlock = true; - let podContent = ""; - let podBuffer = ""; // We "buffer" pod when searching to avoid empty sections - let meaningFullContent = false; - let searchItem; - if([PerlSymbolKind.Package, PerlSymbolKind.Module].includes(elem.type)){ - // Search all. Note I'm not really treating packages different from Modules - } else if([PerlSymbolKind.ImportedSub, PerlSymbolKind.Method, PerlSymbolKind.Inherited, PerlSymbolKind.PathedField, - PerlSymbolKind.LocalMethod, PerlSymbolKind.LocalSub].includes(elem.type)){ - searchItem = elem.name; - searchItem = searchItem.replace(/^[\w:]+::(\w+)$/, "$1"); // Remove package - } else { - return; + const iter = overBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + +function makeDataBlockIterGetter(block: DataBlock): () => DataBlockContent | undefined { + const dataBlockIter = function*(): Generator { + yield* block.paragraphs; + } + + const iter = dataBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} + +function makeNormalDataBlockIterGetter(block: NormalDataBlock): () => PodBlockContent | undefined { + const normalDataBlockIter = function*(): Generator { + yield* block.paragraphs; } - let markdown = ""; + const iter = normalDataBlockIter(); + + const getNext = () => { + let { value, done } = iter.next(); + + if (done || value === undefined) { + return; + } + + return value; + }; + + return getNext; +} - // Quick search for leading comments of a very specific form with comment blocks the preceed a sub (and aren't simply get/set without docs) - // These regexes are painful, but I didn't want to mix this with the line-by-line POD parsing which would overcomplicate that piece +/** Quick search for leading comments of a very specific form with comment + * blocks that preceed a sub (and aren't simply get/set without docs). + * + * Separate function in order to avoid overcomplicating the line-by-line POD parsing. + */ +function quickSearchByComment(symbolName: string, fileContent: string): string | undefined { let match, match2; - if(searchItem && (match = fileContent.match(`\\r?\\n#(?:####+| \-+) *(?:\\r?\\n# *)*${searchItem}\\r?\\n((?:(?:#.*| *)\\r?\\n)+)sub +${searchItem}\\b`))){ + + let markdown: string | undefined; + + if (match = fileContent.match(`\\r?\\n#(?:####+| \-+) *(?:\\r?\\n# *)*${symbolName}\\r?\\n((?:(?:#.*| *)\\r?\\n)+)sub +${symbolName}\\b`)) { // Ensure it's not an empty get/set pair. - if(!( (match2 = searchItem.match(/^get_(\w+)$/)) && match[1].match(new RegExp(`^(?:# +set_${match2[1]}\\r?\\n)?[\\s#]*$`)))){ + if ( + !( + (match2 = symbolName.match(/^get_(\w+)$/)) + && match[1].match(new RegExp(`^(?:# +set_${match2[1]}\\r?\\n)?[\\s#]*$`)) + ) + ) { let content = match[1].replace(/^ *#+ ?/gm,''); content = content.replace(/^\s+|\s+$/g,''); - if(content){ // It may still be empty for non-get functions - markdown += "```text\n" + content + "\n```\n" + + // May still be empty for non-get functions + if (content) { + markdown = "```text\n" + content + "\n```\n"; + } + } + } + + return markdown; +} + +/** Look up a symbol's name in a {@link PodDocument}. + * + * This searches the given POD doc for any `=item` or `=head\d` command paragraph + * that corresponds to the given `symbolName`. + * + * If the matched paragraph is a `=head\d`, returns all paragraphs starting from + * and including the matched header up until either a non-matching `=head\d` + * of the same level *or* a `=head\d` with a higher level is encountered. + * + * If the matched paragraph is an `=item`, returns all paragraphs starting from + * and including the matched item up until either a non-matching `=item` + * *or* the end of the `=item`'s `=over ... =back` block is reached (if any). + */ +function lookupSymbolInPod(symbolName: string, podDoc: PodDocument): PodDocument | undefined { + const symbolRegex = new RegExp( + `(^\\s*(\\$.*->)?${symbolName}(\\(.*\\))?)|(X<${symbolName}>)|(X<<+\\s+${symbolName}\\s+>+>)` + ); + + let extractedContents = matchHeaderRegionInPod( + symbolRegex, + makePodDocContentIterGetter(podDoc), + ); + + if (extractedContents.length === 0) { + extractedContents = matchItemRegionInPod( + symbolRegex, + makePodDocContentIterGetter(podDoc), + ); + } + + if (extractedContents.length === 0) { + return; + } + + return { + kind: "poddocument", + blocks: [ + { + kind: "podblock", + paragraphs: extractedContents, + }, + ], + }; +} + +function matchHeaderRegionInPod(regex: RegExp, getNext: () => PodBlockContent | undefined): Array { + let currentContent: PodBlockContent | undefined; + let extractedContents: Array = []; + + const headerMatchesSymbol = (headerPara: HeaderParagraph) => headerPara.contents.match(regex); + + let foundHeader: HeaderParagraph | undefined; + + while (currentContent = getNext()) { + if (foundHeader) { + if (currentContent.kind === "head") { + // Next =headN command also matches regex, assume it's an alternative + // signature for the same symbol + if (currentContent.level === foundHeader.level && headerMatchesSymbol(currentContent)) { + extractedContents.push(currentContent); + continue; + } + + if (currentContent.level <= foundHeader.level) { + break; + } } + + extractedContents.push(currentContent); + continue; + } + + if (currentContent.kind === "head" && headerMatchesSymbol(currentContent)) { + foundHeader = currentContent; + extractedContents.push(currentContent); } } - // Split the file into lines and iterate through them - const lines = fileContent.split(/\r?\n/); - for (const line of lines) { - if (line.startsWith("=cut")) { - // =cut lines are not added. - inPodBlock = false; + return extractedContents; +} + +function matchItemRegionInPod(regex: RegExp, getNext: () => PodBlockContent | undefined): Array { + let currentContent: PodBlockContent | undefined; + let extractedContents: Array = []; + + const itemMatchesSymbol = (itemPara: UnordererdItemParagraph | OrderedItemParagraph) => { + if (itemPara.lines === undefined) { + return false; } - if (line.match(/^=(pod|head\d|over|item|back|begin|end|for|encoding)/)) { - inPodBlock = true; - meaningFullContent = false; - if(searchItem && line.match(new RegExp(`^=(head\\d|item).*\\b${searchItem}\\b`))){ - // This is structured so if we hit two relevant block in a row, we keep them both - inRelevantBlock = true; - } else { - inRelevantBlock = false; - podBuffer = ""; + for (const line of itemPara.lines) { + if (line.match(regex)) { + return true; + } + } + + return false; + } + + let foundItem: UnordererdItemParagraph | OrderedItemParagraph | undefined; + + while (currentContent = getNext()) { + if (foundItem) { + if (isItem(currentContent)) { + // Next =item command also matches regex, assume it's an alternative + // signature for the same symbol + if (itemMatchesSymbol(currentContent)) { + extractedContents.push(currentContent); + continue; + } + + break; } - } else if(line.match(/\w/)){ - // For this section, we found something that's not a header and has content - meaningFullContent = true; + + extractedContents.push(currentContent); + continue; } - if(inPodBlock){ - if(searchItem){ - if(inRelevantBlock) { - podBuffer += line + "\n"; + switch (currentContent.kind) { + case "unordereditem": + case "ordereditem": + if (itemMatchesSymbol(currentContent)) { + foundItem = currentContent; + extractedContents.push(currentContent); } + break; + case "overblock": + return matchItemRegionInPod(regex, makeOverBlockIterGetter(currentContent)); + case "normaldatablock": + return matchItemRegionInPod(regex, makeNormalDataBlockIterGetter(currentContent)); + } + } + + return extractedContents; +} + +function formatPodDocForSymbol(podDoc: PodDocument) { + // use this as default and let converter handle the adjustment + const symbolHeaderLevel: number = 1; + + // technically there should only be one block, but playing it safe here + for (const block of podDoc.blocks) { + block.paragraphs.forEach((para, index) => { + if (isItem(para)) { + const replacementHeaderPara: HeaderParagraph = { + kind: "head", + level: symbolHeaderLevel, + contents: (para.lines ?? []).join(" "), + lineNo: para.lineNo, + }; + + block.paragraphs[index] = replacementHeaderPara; } - else { - podContent += line + "\n"; + }); + } + + let highestHeaderLevel: number = 6; + + for (const block of podDoc.blocks) { + for (const para of block.paragraphs) { + if (para.kind === "head" && para.level < highestHeaderLevel) { + highestHeaderLevel = para.level; } } + } + + // normalize header levels to `symbolHeaderLevel` + for (const block of podDoc.blocks) { + for (const para of block.paragraphs) { + if (para.kind !== "head") { + continue; + } + + para.level = para.level - (highestHeaderLevel - symbolHeaderLevel); + }; + } +} + +function formatPodDoc(podDoc: PodDocument) { + for (const block of podDoc.blocks) { + block.paragraphs = block.paragraphs.filter((para) => { + return !( + para.kind === "head" + && para.level === 1 + && para.contents.trim() === "NAME" + ); + }); + } +} + +export async function getPod( + elem: PerlElem, + perlDoc: PerlDocument, + modMap: Map +): Promise { + let symbolName: string | undefined; + + switch (elem.type) { + case PerlSymbolKind.Module: + case PerlSymbolKind.Package: + break; + case PerlSymbolKind.ImportedSub: + case PerlSymbolKind.Inherited: + case PerlSymbolKind.PathedField: + case PerlSymbolKind.LocalSub: + case PerlSymbolKind.LocalMethod: + symbolName = elem.name.replace(/^[\w:]+::(\w+)$/, "$1"); // Remove package + break; + default: + return; + } + + // File may not exist - return nothing if it doesn't. + const absolutePath = await resolvePathForDoc(elem, perlDoc, modMap); + + if (!absolutePath) { + return; + } + + let fileContents: string; - if(meaningFullContent && podBuffer != ""){ - podContent += podBuffer; - podBuffer = ""; + try { + fileContents = await fs.promises.readFile(absolutePath, "utf8"); + } catch { + return; + } + + if (symbolName) { + let quickSearchMarkdown = quickSearchByComment(symbolName, fileContents); + if (quickSearchMarkdown) { + return quickSearchMarkdown; } } - - markdown += convertPODToMarkdown(podContent); + + let parser = new RawPodParser(); + let rawPodDocResult = parser.parse(fileContents); + + let processor = new PodProcessor(); + let podDoc = processor.process(rawPodDocResult); + + let podDocRes: PodDocument | undefined = podDoc; + + if (symbolName) { + podDocRes = lookupSymbolInPod(symbolName, podDoc); + + if (podDocRes) { + formatPodDocForSymbol(podDocRes); + } + } + + if (!podDocRes) { + return; + } + + formatPodDoc(podDocRes); + + let converter = new PodToMarkdownConverter(); + let markdown = converter.convert(podDocRes); + + if (!markdown) { + return; + } return markdown; } - async function resolvePathForDoc(elem: PerlElem, perlDoc: PerlDocument, modMap: Map): Promise { let absolutePath = Uri.parse(elem.uri).fsPath; @@ -140,13 +1986,13 @@ async function fsPathOrAlt(fsPath: string | undefined): Promise { return false; } -type ConversionState = { - inList: boolean; - inVerbatim: boolean; - inCustomBlock: boolean; - markdown: string; - encoding: string | null; // Currently processed, but not used - waitingForListTitle: boolean; -}; - -const convertPODToMarkdown = (pod: string): string => { - let finalMarkdown: string = ""; - let state: ConversionState = { - inList: false, - inVerbatim: false, - inCustomBlock: false, - markdown: "", - encoding: null, - waitingForListTitle: false, - }; - - const lines = pod.split("\n"); - - for (let i = 0; i < lines.length; i++) { - let line = lines[i]; - - // Check for verbatim blocks first, perhaps ending a prior one - if (shouldConsiderVerbatim(line) || state.inVerbatim) { - state = processVerbatim(line, state); - finalMarkdown += state.markdown; - if (state.inVerbatim) { - // Don't need to keep going if we're still in verbatim mode - continue; - } - } - - // Inline transformations for code, bold, etc. - line = processInlineElements(line); - - // Handling =pod to start documentation - if (line.startsWith("=pod")) { - continue; // Generally, we just skip this. - } - // Headings - else if (line.startsWith("=head")) { - const output = processHeadings(line); - - if(/\w/.test(finalMarkdown) || !/^\n##+ NAME\n$/.test(output)){ - // I find it a waste of space to include the headline "NAME". We're short on space in the hover - finalMarkdown += output; - } - } - // List markers and items - else if (line.startsWith("=over") || line.startsWith("=item") || line.startsWith("=back") || state.waitingForListTitle) { - state = processList(line, state); - finalMarkdown += state.markdown; - } - // Custom blocks like =begin and =end - else if (line.startsWith("=begin") || line.startsWith("=end")) { - state = processCustomBlock(line, state); - finalMarkdown += state.markdown; - } - // Format-specific blocks like =for - else if (line.startsWith("=for")) { - finalMarkdown += processFormatSpecificBlock(line); - } - // Encoding - else if (line.startsWith("=encoding")) { - state = processEncoding(line, state); - } - - else if(state.inList){ - if(line){ - finalMarkdown += ` ${line} `; - } - } - // Generic text - else { - finalMarkdown += `${line}\n`; - } - } - - return finalMarkdown; -}; - -const processHeadings = (line: string): string => { - // Extract the heading level from the line. This will be a number from 1-6. - let level = parseInt(line.slice(5, 6)); - level = Math.min(level, 3); // Maximum 6 indentation levels in Markdown - // Ensure that the heading level is valid. - if (isNaN(level) || level < 1 || level > 6) { - return ""; - } - - // Extract the actual text of the heading, which follows the =head command. - const text = line.slice(7).trim(); - - // Convert the heading to its Markdown equivalent. I marked head1 -> ### because I prefer the compact form. - const markdownHeading = `\n##${"#".repeat(level)} ${text}\n`; - - return markdownHeading; -}; - -const processList = (line: string, state: ConversionState): ConversionState => { - let markdown: string = ""; - - // The =over command starts a list. - if (line.startsWith("=over")) { - state.inList = true; - markdown = "\n"; - } - - // The =item command denotes a list item. - else if (/^=item \*\s*$/.test(line)) { - state.waitingForListTitle= true; - markdown = ""; - } else if (state.waitingForListTitle && /[^\s]/.test(line)) { - state.waitingForListTitle = false; - markdown = `\n- ${line} \n `; - } - - // The =item command denotes a list item. - else if (line.startsWith("=item")) { - state.inList = true; - - // Remove the '=item' part to get the actual text for the list item. - let listItem = line.substring(6).trim(); - if (listItem.startsWith("* ")) // Doubled up list identifiers - listItem = listItem.replace("*", ""); - markdown = `\n- ${listItem} \n `; // Unordered list - } - // The =back command ends the list. - else if (line.startsWith("=back")) { - state.inList = false; - markdown = "\n"; - } - - return { - ...state, - markdown, - }; -}; - -const processCustomBlock = (line: string, state: ConversionState): ConversionState => { - let markdown = ""; - - // =begin starts a custom block - if (line.startsWith("=begin")) { - // Extract the format following =begin - const format = line.slice(7).trim(); - state.inCustomBlock = true; - - // Choose Markdown representation based on the format - switch (format) { - case "code": - markdown = "```perl\n"; - break; - // Add cases for other formats as needed - default: - markdown = `\n`; - break; - } - } - // =end ends the custom block - else if (line.startsWith("=end")) { - // Extract the format following =end - const format = line.slice(5).trim(); - state.inCustomBlock = false; - - // Close the Markdown representation - switch (format) { - case "code": - markdown = "```\n"; - break; - // Add cases for other formats as needed - default: - markdown = `\n`; - break; - } - } - - return { - ...state, - markdown, - }; -}; - -const processFormatSpecificBlock = (line: string): string => { - // The `=for` command itself is followed by the format and then the text. - const parts = line.split(" ").slice(1); - - if (parts.length < 2) { - return ""; - } - - // Extract the format and the actual text. - const format = parts[0].trim(); - const text = parts.slice(1).join(" ").trim(); - - // Choose the Markdown representation based on the format. - let markdown = ""; - switch (format) { - case "text": - // Plain text, just add it. - markdown = `${text}\n`; - break; - case "html": - // If it's HTML, encapsulate it within comments for safety. - markdown = `\n`; - break; - // Add more cases as you find the need for other specific formats. - default: - // For unsupported or custom formats, wrap it in a comment. - markdown = `\n`; - break; - } - - return markdown; -}; - // Mapping backticks to the Unicode non-character U+FFFF which is not allowed to appear in text const tempPlaceholder = '\uFFFF'; @@ -403,8 +2030,7 @@ const processInlineElements = (line: string): string => { line = line.replace(/C<((?:[^<>]|[EL]<[^<>]+>)+?)>/g, (match, code) => escapeBackticks(code)); // Unfortunately doesn't require the <<< to be matched in quantity. E<> is allowed automatically - line = line.replace(/C<< (.+?) >>/g, (match, code) => escapeBackticks(code)); - line = line.replace(/C<<<+ (.+?) >+>>/g, (match, code) => escapeBackticks(code)); + line = line.replace(/C<<+\s+(.+?)\s+>+>/g, (match, code) => escapeBackticks(code)); // Handle special characters (E) line = line.replace(/E<([^>]+)>/g, (match, entity) => convertE(entity)); @@ -412,77 +2038,47 @@ const processInlineElements = (line: string): string => { // Mapping the Unicode non-character U+FFFF back to escaped backticks line = line.replace(new RegExp(tempPlaceholder, 'g'), '\\`'); + // Handle bold italic (B>) + line = line.replace(/B]+)>>/g, "***$1***"); + line = line.replace(/B>+>/g, "***$1***"); + line = line.replace(/B<<+\s+I<([^<>]+)>\s+>+>/g, "***$1***"); + line = line.replace(/B<<+\s+I<<+\s+(.+?)\s+>+>\s+>+>/g, "***$1***"); + + // Handle italic bold (B>) + line = line.replace(/I]+)>>/g, "***$1***"); + line = line.replace(/I>+>/g, "***$1***"); + line = line.replace(/I<<+\s+B<([^<>]+)>\s+>+>/g, "***$1***"); + line = line.replace(/I<<+\s+B<<+\s+(.+?)\s+>+>\s+>+>/g, "***$1***"); + // Handle bold (B) line = line.replace(/B<([^<>]+)>/g, "**$1**"); - line = line.replace(/B<< (.+?) >>/g, "**$1**"); + line = line.replace(/B<<+\s+(.+?)\s+>+>/g, "**$1**"); // Handle italics (I) line = line.replace(/I<([^<>]+)>/g, "*$1*"); - line = line.replace(/I<< (.+?) >>/g, "*$1*"); + line = line.replace(/I<<+\s+(.+?)\s+>+>/g, "*$1*"); // Handle links (L), URLS auto-link in vscode's markdown line = line.replace(/L<(http[^>]+)>/g, " $1 "); line = line.replace(/L<([^<>]+)>/g, "`$1`"); - line = line.replace(/L<< (.*?) >>/g, "`$1`"); + line = line.replace(/L<<+\s+(.*?)\s+>+>/g, "`$1`"); // Handle non-breaking spaces (S) line = line.replace(/S<([^<>]+)>/g, "$1"); + line = line.replace(/S<<+\s+(.+?)\s+>+>/g, "$1"); // Handle file names (F), converting to italics line = line.replace(/F<([^<>]+)>/g, "*$1*"); + line = line.replace(/F<<+\s+(.+?)\s+>+>/g, "*$1*"); // Handle index entries (X), ignoring as Markdown doesn't have an index line = line.replace(/X<([^<>]+)>/g, ""); - - // Escape HTML entities last since we use them above - line = escapeHTML(line); + line = line.replace(/X<<+\s+(.+?)\s+>+>/g, "$1"); return line; }; - -function escapeRegExp(str: string): string { - return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string - } - - - -const escapeHTML = (str: string): string => { - const map: { [key: string]: string } = { - "&": "&", - "<": "<", - ">": ">", - '"': """, - "'": "'", - "\\\\": "\\", // Two backslashes become one - - // These are required for the regex to consume & to ensure they don't get mapped to amp style. - "\\&": "\\&", - "\\<": "\\<", - '\\"': '\\"', - "\\'": "\\'", - }; - - // If the number of backticks is odd, it means backticks are unbalanced - const backtickCount = (str.match(/`/g) || []).length; - const segments = str.split("`"); - - if (backtickCount % 2 !== 0 || segments.length % 2 === 0) { - // Handle the unbalanced backticks here - str = str.replaceAll("`", ""); - } - - // Escape special characters and create a regex pattern - const pattern = new RegExp( Object.keys(map).map(escapeRegExp).join('|'), 'g' ); - - for (let i = 0; i < segments.length; i += 2) { - segments[i] = segments[i].replace(pattern, (m) => map[m]); - } - - return segments.join("`"); -}; - const escapeBackticks = (str: string): string => { let count = (str.match(new RegExp(tempPlaceholder, 'g')) || []).length; str = str.replace(new RegExp(tempPlaceholder, 'g'), '`'); // Backticks inside don't need to be escaped. @@ -513,54 +2109,3 @@ const convertE = (content: string): string => { } } }; - -// Determine if the line should start a verbatim text block -const shouldConsiderVerbatim = (line: string): boolean => { - // A verbatim block starts with a whitespace but isn't part of a list - return /^\s+/.test(line); -}; - -// Process verbatim text blocks -const processVerbatim = (line: string, state: ConversionState): ConversionState => { - let markdown = ""; - if (/^\s+/.test(line)) { - // If this is the start of a new verbatim block, add Markdown code fence - if (!state.inVerbatim) { - markdown += "\n```\n"; - } - state.inVerbatim = true; - - // Trim some starting whitespace and add the line to the block - // Most pod code has 4 spaces or a tab, but I find 2 space indents most readable in the space constrained pop-up - markdown += line.replace(/^(?:\s{4}|\t)/, " ") + "\n"; - } - // } else if(/^\s+/.test(line)){ - // // Verbatim blocks in lists are tricky. Let's just do one line at a time for now so we don't need to keep track of indentation - // markdown = "```\n" + line + "```\n"; - // state.isLineVerbatim = true; - // } - else if (state.inVerbatim) { - // This line ends the verbatim block - state.inVerbatim = false; - markdown += "```\n"; // End the Markdown code fence - } - - return { - ...state, - markdown, - }; -}; - -const processEncoding = (line: string, state: ConversionState): ConversionState => { - // Extract the encoding type from the line - const encodingType = line.split(" ")[1]?.trim(); - - if (encodingType) { - return { - ...state, - encoding: encodingType, - }; - } - - return state; -};