From e93e8a7e97e87e544ae5ed36fd7015fa174939a7 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 11:36:11 -0600 Subject: [PATCH 01/10] update dependencies --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 5713f00..a367e91 100644 --- a/setup.py +++ b/setup.py @@ -21,10 +21,10 @@ ], keywords="markdown confluence", install_requires=[ - "mistune==0.8.4", + "mistune==3.0.1", "tortilla==0.5.0", - "PyYAML==6.0", - "gitignore_parser==0.0.8", + "PyYAML==6.0.1", + "gitignore_parser==0.1.6", ], python_requires=">=3.6", entry_points={"console_scripts": ["md2cf=md2cf.__main__:main"]}, From c3acd0d10caca4397f3936debf48a1f17fa2b6e0 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 11:37:26 -0600 Subject: [PATCH 02/10] Update mistune related changes --- md2cf/confluence_renderer.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index 44be072..0f61484 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -49,8 +49,10 @@ def append(self, child): self.children.append(child) -class ConfluenceRenderer(mistune.Renderer): - def __init__(self, strip_header=False, remove_text_newlines=False, **kwargs): +class ConfluenceRenderer(mistune.HTMLRenderer): + def __init__(self, strip_header=False, + remove_text_newlines=False, + **kwargs): super().__init__(**kwargs) self.strip_header = strip_header self.remove_text_newlines = remove_text_newlines @@ -61,14 +63,14 @@ def reinit(self): self.attachments = list() self.title = None - def header(self, text, level, raw=None): + def heading(self, text, level, raw=None): if self.title is None and level == 1: self.title = text # Don't duplicate page title as a header if self.strip_header: return "" - return super(ConfluenceRenderer, self).header(text, level, raw=raw) + return super(ConfluenceRenderer, self).heading(text, level, raw=raw) def structured_macro(self, name): return ConfluenceTag("structured-macro", attrib={"name": name}) From 5d4bc64eb711b2135bd55fe5c03bf8878f34174c Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 11:37:41 -0600 Subject: [PATCH 03/10] Return text instead of rendering text This is needed to the actual renderer can escape (or not) the text value. Depending on the escape parameter on the renderer --- md2cf/confluence_renderer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/md2cf/confluence_renderer.py b/md2cf/confluence_renderer.py index 0f61484..b0bdb3c 100644 --- a/md2cf/confluence_renderer.py +++ b/md2cf/confluence_renderer.py @@ -89,7 +89,7 @@ def text(self, text): if self.remove_text_newlines: text = text.replace("\n", " ") - return super().text(text) + return text def block_code(self, code, lang=None): root_element = self.structured_macro("code") From 1530f0d53131ebcbd5b0ad65c31e513c79ccbe5f Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 11:40:46 -0600 Subject: [PATCH 04/10] Add renderer arguments escape and allow_harmful_protocols These are needed by mistune renderer --- md2cf/document.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/md2cf/document.py b/md2cf/document.py index 4419852..77b4304 100644 --- a/md2cf/document.py +++ b/md2cf/document.py @@ -180,7 +180,9 @@ def get_pages_from_directory( def get_page_data_from_file_path( - file_path: Path, strip_header: bool = False, remove_text_newlines: bool = False + file_path: Path, + strip_header: bool = False, + remove_text_newlines: bool = False, ) -> Page: if not isinstance(file_path, Path): file_path = Path(file_path) @@ -234,17 +236,22 @@ def parse_page( markdown_lines: List[str], strip_header: bool = False, remove_text_newlines: bool = False, + escape: bool = False, + allow_harmful_protocols: bool = False ) -> Page: renderer = ConfluenceRenderer( - use_xhtml=True, strip_header=strip_header, remove_text_newlines=remove_text_newlines, + escape=escape, + allow_harmful_protocols=allow_harmful_protocols ) confluence_mistune = mistune.Markdown(renderer=renderer) confluence_content = confluence_mistune("".join(markdown_lines)) page = Page( - title=renderer.title, body=confluence_content, attachments=renderer.attachments + title=renderer.title, + body=confluence_content, + attachments=renderer.attachments ) return page From b115acccbe7cd1024c22027878eb2cf576d0c2b7 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 11:42:19 -0600 Subject: [PATCH 05/10] Update render test to use mistune correct heading function --- tests/unit/test_renderer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit/test_renderer.py b/tests/unit/test_renderer.py index 8f83234..06eb7b4 100644 --- a/tests/unit/test_renderer.py +++ b/tests/unit/test_renderer.py @@ -99,7 +99,7 @@ def test_tag_render_with_child_and_text(): def test_renderer_reinit(): renderer = ConfluenceRenderer() - renderer.header("this is a title", 1) + renderer.heading("this is a title", 1) assert renderer.title is not None renderer.reinit() @@ -138,7 +138,7 @@ def test_renderer_header_sets_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_header, 1) + renderer.heading(test_header, 1) assert renderer.title == test_header @@ -147,7 +147,7 @@ def test_renderer_strips_header(): test_header = "this is a header" renderer = ConfluenceRenderer(strip_header=True) - result = renderer.header(test_header, 1) + result = renderer.heading(test_header, 1) assert result == "" @@ -156,7 +156,7 @@ def test_renderer_header_lower_level_does_not_set_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_header, 2) + renderer.heading(test_header, 2) assert renderer.title is None @@ -166,8 +166,8 @@ def test_renderer_header_later_level_sets_title(): test_header = "this is a header" renderer = ConfluenceRenderer() - renderer.header(test_lower_header, 2) - renderer.header(test_header, 1) + renderer.heading(test_lower_header, 2) + renderer.heading(test_header, 1) assert renderer.title is test_header @@ -177,8 +177,8 @@ def test_renderer_header_only_sets_first_title(): test_second_header = "this is another header" renderer = ConfluenceRenderer() - renderer.header(test_header, 1) - renderer.header(test_second_header, 1) + renderer.heading(test_header, 1) + renderer.heading(test_second_header, 1) assert renderer.title is test_header From 42d29a6be29b19637eab911c5997afe5161438a6 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 11:42:39 -0600 Subject: [PATCH 06/10] update test.md and result.xml --- tests/functional/result.xml | 69 +++++++++++++++++-------------------- tests/functional/test.md | 39 +++++++-------------- 2 files changed, 44 insertions(+), 64 deletions(-) diff --git a/tests/functional/result.xml b/tests/functional/result.xml index ca22230..ded83c6 100644 --- a/tests/functional/result.xml +++ b/tests/functional/result.xml @@ -1,27 +1,27 @@

Markdown: Syntax

    +
  • Markdown: Syntax
    • Overview
    • -
    • Block Elements

      Note: This document is itself written using Markdown; you can see the source for it by adding '.text' to the URL.


      @@ -60,7 +60,8 @@ determines the header level.)

      familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a > before every line:

      -

      This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

      +

      This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

      Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse @@ -68,22 +69,28 @@ id sem consectetuer libero luctus adipiscing.

      Markdown allows you to be lazy and only put the > before the first line of a hard-wrapped paragraph:

      -

      This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

      +

      This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

      +
      +

      Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing.

      Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of >:

      -

      This is the first level of quoting.

      -

      This is nested blockquote.

      +
      +

      This is the first level of quoting.

      +
      +

      This is nested blockquote.

      Back to the first level.

      Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:

      -

      This is a header.

      +
      +

      This is a header.

      1. This is the first list item.
      2. This is the second list item.
      3. @@ -132,16 +139,6 @@ Markdown produces from the above list is:

      4. McHale
      5. Parish
      -

      or even:

      -
        -
      1. Bird
      2. -
      3. McHale
      4. -
      5. Parish
      6. -
      -

      you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to.

      To make lists look nice, you can wrap items with hanging indents:

      • Lorem ipsum dolor sit amet, consectetuer adipiscing elit. @@ -188,7 +185,8 @@ sit amet, consectetuer adipiscing elit.

        delimiters need to be indented:

        • A list item with a blockquote:

          -

          This is a blockquote +

          +

          This is a blockquote inside a list item.

        • @@ -211,17 +209,13 @@ in both <pre> and <code> tags.

          block by at least 4 spaces or 1 tab.

          This is a normal paragraph:

          true - +

          Here is an example of AppleScript:

          true +end tell]]>

          A code block continues until it reaches a line that is not indented (or the end of the article).

          @@ -233,9 +227,7 @@ ampersands and angle brackets. For example, this:

          true © 2004 Foo Corporation - - -]]> +]]>

          Regular Markdown syntax is not processed within code blocks. E.g., asterisks are just literal asterisks within a code block. This means @@ -243,7 +235,8 @@ it's also easy to use Markdown to write about Markdown's own syntax.

          true +end tell +]]>

          Span Elements

          Links

          diff --git a/tests/functional/test.md b/tests/functional/test.md index 8aaab12..bade426 100644 --- a/tests/functional/test.md +++ b/tests/functional/test.md @@ -1,20 +1,18 @@ # Markdown: Syntax -* [Overview](#overview) - * [Philosophy](#philosophy) - * [Inline HTML](#html) - * [Automatic Escaping for Special Characters](#autoescape) -* [Block Elements](#block) - * [Paragraphs and Line Breaks](#p) - * [Headers](#header) - * [Blockquotes](#blockquote) - * [Lists](#list) - * [Code Blocks](#precode) - * [Horizontal Rules](#hr) -* [Span Elements](#span) - * [Links](#link) - * [Emphasis](#em) - * [Code](#code) +- [Markdown: Syntax](#markdown-syntax) + - [Overview](#overview) + - [Philosophy](#philosophy) + - [Block Elements](#block-elements) + - [Paragraphs and Line Breaks](#paragraphs-and-line-breaks) + - [Headers](#headers) + - [Blockquotes](#blockquotes) + - [Lists](#lists) + - [Code Blocks](#code-blocks) + - [Span Elements](#span-elements) + - [Links](#links) + - [Emphasis](#emphasis) + - [Code](#code) **Note:** This document is itself written using Markdown; you @@ -154,17 +152,6 @@ If you instead wrote the list in Markdown like this: 1. McHale 1. Parish -or even: - -3. Bird -1. McHale -8. Parish - -you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to. - To make lists look nice, you can wrap items with hanging indents: * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. From e0cc032f0666dd761ae94b650c1abfeeb3cc927e Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 12:35:07 -0600 Subject: [PATCH 07/10] update test.md and result.xml to match new Mistune --- test_package/functional/result.xml | 69 ++++++++++++++---------------- test_package/functional/test.md | 39 ++++++----------- 2 files changed, 44 insertions(+), 64 deletions(-) diff --git a/test_package/functional/result.xml b/test_package/functional/result.xml index ca22230..ded83c6 100644 --- a/test_package/functional/result.xml +++ b/test_package/functional/result.xml @@ -1,27 +1,27 @@

          Markdown: Syntax

            +
          • Markdown: Syntax
            • Overview
            • -
            • Block Elements

              Note: This document is itself written using Markdown; you can see the source for it by adding '.text' to the URL.


              @@ -60,7 +60,8 @@ determines the header level.)

              familiar with quoting passages of text in an email message, then you know how to create a blockquote in Markdown. It looks best if you hard wrap the text and put a > before every line:

              -

              This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

              +

              This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

              Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse @@ -68,22 +69,28 @@ id sem consectetuer libero luctus adipiscing.

              Markdown allows you to be lazy and only put the > before the first line of a hard-wrapped paragraph:

              -

              This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +

              +

              This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

              +
              +

              Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse id sem consectetuer libero luctus adipiscing.

              Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by adding additional levels of >:

              -

              This is the first level of quoting.

              -

              This is nested blockquote.

              +
              +

              This is the first level of quoting.

              +
              +

              This is nested blockquote.

              Back to the first level.

              Blockquotes can contain other Markdown elements, including headers, lists, and code blocks:

              -

              This is a header.

              +
              +

              This is a header.

              1. This is the first list item.
              2. This is the second list item.
              3. @@ -132,16 +139,6 @@ Markdown produces from the above list is:

              4. McHale
              5. Parish
              -

              or even:

              -
                -
              1. Bird
              2. -
              3. McHale
              4. -
              5. Parish
              6. -
              -

              you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to.

              To make lists look nice, you can wrap items with hanging indents:

              • Lorem ipsum dolor sit amet, consectetuer adipiscing elit. @@ -188,7 +185,8 @@ sit amet, consectetuer adipiscing elit.

                delimiters need to be indented:

                • A list item with a blockquote:

                  -

                  This is a blockquote +

                  +

                  This is a blockquote inside a list item.

                • @@ -211,17 +209,13 @@ in both <pre> and <code> tags.

                  block by at least 4 spaces or 1 tab.

                  This is a normal paragraph:

                  true - +

                  Here is an example of AppleScript:

                  true +end tell]]>

                  A code block continues until it reaches a line that is not indented (or the end of the article).

                  @@ -233,9 +227,7 @@ ampersands and angle brackets. For example, this:

                  true © 2004 Foo Corporation - - -]]> +]]>

                  Regular Markdown syntax is not processed within code blocks. E.g., asterisks are just literal asterisks within a code block. This means @@ -243,7 +235,8 @@ it's also easy to use Markdown to write about Markdown's own syntax.

                  true +end tell +]]>

                  Span Elements

                  Links

                  diff --git a/test_package/functional/test.md b/test_package/functional/test.md index 8aaab12..bade426 100644 --- a/test_package/functional/test.md +++ b/test_package/functional/test.md @@ -1,20 +1,18 @@ # Markdown: Syntax -* [Overview](#overview) - * [Philosophy](#philosophy) - * [Inline HTML](#html) - * [Automatic Escaping for Special Characters](#autoescape) -* [Block Elements](#block) - * [Paragraphs and Line Breaks](#p) - * [Headers](#header) - * [Blockquotes](#blockquote) - * [Lists](#list) - * [Code Blocks](#precode) - * [Horizontal Rules](#hr) -* [Span Elements](#span) - * [Links](#link) - * [Emphasis](#em) - * [Code](#code) +- [Markdown: Syntax](#markdown-syntax) + - [Overview](#overview) + - [Philosophy](#philosophy) + - [Block Elements](#block-elements) + - [Paragraphs and Line Breaks](#paragraphs-and-line-breaks) + - [Headers](#headers) + - [Blockquotes](#blockquotes) + - [Lists](#lists) + - [Code Blocks](#code-blocks) + - [Span Elements](#span-elements) + - [Links](#links) + - [Emphasis](#emphasis) + - [Code](#code) **Note:** This document is itself written using Markdown; you @@ -154,17 +152,6 @@ If you instead wrote the list in Markdown like this: 1. McHale 1. Parish -or even: - -3. Bird -1. McHale -8. Parish - -you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to. - To make lists look nice, you can wrap items with hanging indents: * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. From 7ae84b72e3387945e6ca6cb5bdb183a99731aff5 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Fri, 15 Sep 2023 12:44:02 -0600 Subject: [PATCH 08/10] listen to the linter --- md2cf/document.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/md2cf/document.py b/md2cf/document.py index 41a13c8..3103dbb 100644 --- a/md2cf/document.py +++ b/md2cf/document.py @@ -275,14 +275,14 @@ def parse_page( remove_text_newlines: bool = False, enable_relative_links: bool = False, escape: bool = False, - allow_harmful_protocols: bool = False + allow_harmful_protocols: bool = False, ) -> Page: renderer = ConfluenceRenderer( strip_header=strip_header, remove_text_newlines=remove_text_newlines, enable_relative_links=enable_relative_links, escape=escape, - allow_harmful_protocols=allow_harmful_protocols + allow_harmful_protocols=allow_harmful_protocols, ) confluence_mistune = mistune.Markdown(renderer=renderer) confluence_content = confluence_mistune("".join(markdown_lines)) From f85d20f8fa189ba0e2e59d9cc13b6b8755e7c6a7 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Sun, 22 Oct 2023 10:39:19 -0600 Subject: [PATCH 09/10] fix dependencies remove uneeded packages as for latest md2cf version --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 2a508e1..b7e5656 100644 --- a/setup.py +++ b/setup.py @@ -23,13 +23,11 @@ install_requires=[ "rich-argparse==1.0.0", "rich==13.0.1", + "mistune==3.0.1", "chardet==5.1.0", "requests==2.31.0", "PyYAML==6.0.1", "gitignorefile==1.1.2", - "mistune==3.0.1", - "tortilla==0.5.0", - "gitignore_parser==0.1.6", ], python_requires=">=3.7", entry_points={"console_scripts": ["md2cf=md2cf.__main__:main"]}, From ef40183ed156d41231d54f209380b97499175ee3 Mon Sep 17 00:00:00 2001 From: Edmundo Sanchez Date: Sun, 22 Oct 2023 10:51:17 -0600 Subject: [PATCH 10/10] keep the unordered list that starts on #3 on the test --- test_package/functional/result.xml | 10 ++++++++++ test_package/functional/test.md | 11 +++++++++++ 2 files changed, 21 insertions(+) diff --git a/test_package/functional/result.xml b/test_package/functional/result.xml index ded83c6..4978a28 100644 --- a/test_package/functional/result.xml +++ b/test_package/functional/result.xml @@ -139,6 +139,16 @@ Markdown produces from the above list is:

                • McHale
                • Parish
                • +

                  or even:

                  +
                    +
                  1. Bird
                  2. +
                  3. McHale
                  4. +
                  5. Parish
                  6. +
                  +

                  you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to.

                  To make lists look nice, you can wrap items with hanging indents:

                  • Lorem ipsum dolor sit amet, consectetuer adipiscing elit. diff --git a/test_package/functional/test.md b/test_package/functional/test.md index bade426..9f85ecb 100644 --- a/test_package/functional/test.md +++ b/test_package/functional/test.md @@ -152,6 +152,17 @@ If you instead wrote the list in Markdown like this: 1. McHale 1. Parish +or even: + +3. Bird +1. McHale +8. Parish + +you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to. + To make lists look nice, you can wrap items with hanging indents: * Lorem ipsum dolor sit amet, consectetuer adipiscing elit.