From a411b746cbb89a7fafb9ec92812234c7cdbcfb3d Mon Sep 17 00:00:00 2001 From: V360 Date: Tue, 3 Sep 2024 21:17:17 -0400 Subject: [PATCH 1/5] make indents work with tabs, other whitespace chrs --- parse.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/parse.js b/parse.js index 195d9e2..7c0c1b2 100644 --- a/parse.js +++ b/parse.js @@ -12,6 +12,7 @@ class Markup_12y2 { constructor() { '{EOL}': "(?![^\\n])", '{BOL}': "^", '{ANY}': "[^]", + '{LINE_WS}': "[^\n\S]", '{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*", '{URL_FINAL}': "[-\\w/%&=#+~@$*']", } @@ -42,9 +43,9 @@ class Markup_12y2 { constructor() { `[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}` `([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}` `{BOL}[|][-][-+]*[-][|]{EOL}${'TABLE_DIVIDER'}` // `{BOL}[|][|][|]{EOL}${'TABLE_DIVIDER'}` - `{BOL} *[|]${'TABLE_START'}` - ` *[|][|]?${'TABLE_CELL'}` - `{BOL} *[-]${'LIST_ITEM'}` + `{BOL}{LINE_WS}*[|]${'TABLE_START'}` + `{LINE_WS}*[|][|]?${'TABLE_CELL'}` + `{BOL}{LINE_WS}*[-]${'LIST_ITEM'}` () //todo: org tables separators? @@ -313,7 +314,7 @@ class Markup_12y2 { constructor() { } const ARG_REGEX = /.*?(?=])/y const WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y - const CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack + const CODE_REGEX = /(?:[^\n\S]*([-\w.+#$ ]+?)[^\n\S]*(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack const parse=(text)=>{ let tree = {type: 'ROOT', content: [], prev: 'all_newline'} @@ -323,7 +324,8 @@ class Markup_12y2 { constructor() { // these use REGEX, text const skip_spaces=()=>{ let pos = REGEX.lastIndex - while (" "===text.charAt(pos)) + // FIXME: should be regex-based to catch all types of whitespace whatever they may be + while (" "===text.charAt(pos) || "\t"===text.charAt(pos)) pos++ REGEX.lastIndex = pos } @@ -528,7 +530,7 @@ class Markup_12y2 { constructor() { CLOSE(true) CLOSE() // cell // TODO: HACK - if (/^ *[|][|]/.test(token)) { + if (/^[^\n\S]*[|][|]/.test(token)) { let last = current.content[current.content.length-1] last.args.div = true } From 3d9ff7a489aa81ec38ed6b7c2fb137dde092cbf2 Mon Sep 17 00:00:00 2001 From: V360 Date: Wed, 4 Sep 2024 14:17:39 -0400 Subject: [PATCH 2/5] use given "horizontal whitespace" regex instead of `[^\S\n]`, which includes many vertical whitespace characters, use the given horizontal whitespace regex. thanks y! --- parse.js | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/parse.js b/parse.js index 7c0c1b2..f5ebeb5 100644 --- a/parse.js +++ b/parse.js @@ -12,7 +12,7 @@ class Markup_12y2 { constructor() { '{EOL}': "(?![^\\n])", '{BOL}': "^", '{ANY}': "[^]", - '{LINE_WS}': "[^\n\S]", + '{HORIZ_WS}': "[ \t\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]", '{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*", '{URL_FINAL}': "[-\\w/%&=#+~@$*']", } @@ -43,9 +43,9 @@ class Markup_12y2 { constructor() { `[\`][^\`\n]*([\`]{2}[^\`\n]*)*[\`]?${'INLINE_CODE'}` `([!]${'EMBED'})?\b(https?://|sbs:){URL_CHARS}{URL_FINAL}([(]{URL_CHARS}[)]({URL_CHARS}{URL_FINAL})?)?${'LINK'}` `{BOL}[|][-][-+]*[-][|]{EOL}${'TABLE_DIVIDER'}` // `{BOL}[|][|][|]{EOL}${'TABLE_DIVIDER'}` - `{BOL}{LINE_WS}*[|]${'TABLE_START'}` - `{LINE_WS}*[|][|]?${'TABLE_CELL'}` - `{BOL}{LINE_WS}*[-]${'LIST_ITEM'}` + `{BOL}{HORIZ_WS}*[|]${'TABLE_START'}` + `{HORIZ_WS}*[|][|]?${'TABLE_CELL'}` + `{BOL}{HORIZ_WS}*[-]${'LIST_ITEM'}` () //todo: org tables separators? @@ -314,7 +314,8 @@ class Markup_12y2 { constructor() { } const ARG_REGEX = /.*?(?=])/y const WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y - const CODE_REGEX = /(?:[^\n\S]*([-\w.+#$ ]+?)[^\n\S]*(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack + const CODE_REGEX = /(?:[ \t\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]*([-\w.+#$ ]+?)[ \t\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]*(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack + // FIXME: should directly use {HORIZ_WS} macro const parse=(text)=>{ let tree = {type: 'ROOT', content: [], prev: 'all_newline'} From 280eb11fd79961d87e81fe4e2c0d1ad8118ceac1 Mon Sep 17 00:00:00 2001 From: V360 Date: Wed, 4 Sep 2024 15:26:34 -0400 Subject: [PATCH 3/5] use "horiz. whitespace" macro for skipping spaces --- parse.js | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/parse.js b/parse.js index f5ebeb5..d0f3629 100644 --- a/parse.js +++ b/parse.js @@ -314,8 +314,8 @@ class Markup_12y2 { constructor() { } const ARG_REGEX = /.*?(?=])/y const WORD_REGEX = /[^\s`^()+=\[\]{}\\|"';:,.<>/?!*]*/y - const CODE_REGEX = /(?:[ \t\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]*([-\w.+#$ ]+?)[ \t\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]*(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack - // FIXME: should directly use {HORIZ_WS} macro + const CODE_REGEX = /(?: *([-\w.+#$ ]+?) *(?![^\n]))?\n?([^]*?)(?:\n?```|$)/y // ack + const SPACE_REGEX = new RegExp(`${MACROS['{HORIZ_WS}']}*`, 'y') const parse=(text)=>{ let tree = {type: 'ROOT', content: [], prev: 'all_newline'} @@ -324,11 +324,9 @@ class Markup_12y2 { constructor() { // these use REGEX, text const skip_spaces=()=>{ - let pos = REGEX.lastIndex - // FIXME: should be regex-based to catch all types of whitespace whatever they may be - while (" "===text.charAt(pos) || "\t"===text.charAt(pos)) - pos++ - REGEX.lastIndex = pos + SPACE_REGEX.lastIndex = REGEX.lastIndex + SPACE_REGEX.exec(text) + REGEX.lastIndex = SPACE_REGEX.lastIndex } const read_code=()=>{ let pos = REGEX.lastIndex From 4164b2385c4aa8051808bc5d6f4ddff8da596fb0 Mon Sep 17 00:00:00 2001 From: V360 Date: Wed, 4 Sep 2024 15:29:37 -0400 Subject: [PATCH 4/5] undo unneeded change --- parse.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.js b/parse.js index d0f3629..9a8f6de 100644 --- a/parse.js +++ b/parse.js @@ -529,7 +529,7 @@ class Markup_12y2 { constructor() { CLOSE(true) CLOSE() // cell // TODO: HACK - if (/^[^\n\S]*[|][|]/.test(token)) { + if (/^ *[|][|]/.test(token)) { let last = current.content[current.content.length-1] last.args.div = true } From fec5108f152df890a96883d37573965bd7c5da93 Mon Sep 17 00:00:00 2001 From: V360 Date: Thu, 5 Sep 2024 15:51:18 -0400 Subject: [PATCH 5/5] unuse given "horizontal whitespace" regex --- parse.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.js b/parse.js index 9a8f6de..480bcab 100644 --- a/parse.js +++ b/parse.js @@ -12,7 +12,7 @@ class Markup_12y2 { constructor() { '{EOL}': "(?![^\\n])", '{BOL}': "^", '{ANY}': "[^]", - '{HORIZ_WS}': "[ \t\xA0\u1680\u2000-\u200A\u202F\u205F\u3000]", + '{HORIZ_WS}': "[ \t]", '{URL_CHARS}': "[-\\w/%&=#+~@$*'!?,.;:]*", '{URL_FINAL}': "[-\\w/%&=#+~@$*']", }