From c8058c162f4adcd1c11fa90ab151aeaf79aa0c14 Mon Sep 17 00:00:00 2001 From: WillForan Date: Sun, 29 Dec 2024 14:46:06 -0500 Subject: [PATCH 1/2] fix #99: implement commented string_quote_removal --- bashlex/heredoc.py | 18 ++++++++++++++++-- tests/test_parser.py | 17 +++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/bashlex/heredoc.py b/bashlex/heredoc.py index c6ee2e4f..c1da11a7 100644 --- a/bashlex/heredoc.py +++ b/bashlex/heredoc.py @@ -1,4 +1,5 @@ from bashlex import ast, errors +import re def gatherheredocuments(tokenizer): # if we're at the end of the input and we're not strict, allow skipping @@ -11,9 +12,22 @@ def gatherheredocuments(tokenizer): redirnode, killleading = tokenizer.redirstack.pop(0) makeheredoc(tokenizer, redirnode, 0, killleading) + +def string_quote_removal(word): + """remove single quotes for heredoc + >>> string_quote_removal("'EOF'") + 'EOF' + >>> string_quote_removal("EOF") + 'EOF' + """ + quote_match = re.search("^'(.*)'$", word) + if quote_match: + word = quote_match.group(1) + return word + + def makeheredoc(tokenizer, redirnode, lineno, killleading): - # redirword = string_quote_removal(redirectnode.word) - redirword = redirnode.output.word + redirword = string_quote_removal(redirnode.output.word) document = [] startpos = tokenizer._shell_input_line_index diff --git a/tests/test_parser.py b/tests/test_parser.py index 10809c79..f04e83c8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -779,6 +779,23 @@ def test_heredoc_with_actual_doc(self): "delimited by end-of-file \\(wanted 'EOF'", parse, s) + def test_heredoc_singlequotes(self): + doc = 'foo\nbar\nEOF' + s = '''a <<'EOF' +%s''' % doc + + self.assertASTEquals(s, + commandnode("a <<'EOF'", + wordnode('a'), + redirectnode("<<'EOF'\n%s" % doc, None, '<<', wordnode("'EOF'"), + heredocnode(doc)) + )) + + s = "a <<'EOF'\nb" + self.assertRaisesRegex(errors.ParsingError, + "delimited by end-of-file \\(wanted 'EOF'", + parse, s) + def test_herestring(self): s = 'a <<<"b\nc"' self.assertASTEquals(s, From fff4df03615a86519236762f8e7a32457bac00c5 Mon Sep 17 00:00:00 2001 From: WillForan Date: Sat, 18 Jan 2025 20:59:33 -0500 Subject: [PATCH 2/2] string_quote_removal: rm escaped and double quotes (#99) --- bashlex/heredoc.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/bashlex/heredoc.py b/bashlex/heredoc.py index c1da11a7..d043c16a 100644 --- a/bashlex/heredoc.py +++ b/bashlex/heredoc.py @@ -14,15 +14,36 @@ def gatherheredocuments(tokenizer): def string_quote_removal(word): - """remove single quotes for heredoc + """ + Remove surrounding quotes for heredoc token. + Other quotes are removed from the token unless escaped. + + See definition in bash's source + https://github.com/bminor/bash/blob/master/subst.c#L11892 + + >>> string_quote_removal("EOF") + 'EOF' >>> string_quote_removal("'EOF'") 'EOF' - >>> string_quote_removal("EOF") + >>> string_quote_removal('"EOF"') 'EOF' + >>> string_quote_removal('HERE\\\\"DOC\\\\"') + 'HERE"DOC"' + >>> string_quote_removal('"HERE"x"DOC"') + 'HERExDOC' + """ - quote_match = re.search("^'(.*)'$", word) + # remove paired quote from start and end + quote_match = re.search("^([\"'])(.*)\\1$", word) if quote_match: - word = quote_match.group(1) + word = quote_match.group(2) + + # removing unescaped quotes. assumes matching quote pairs + # bash code would fail to parse otherwise (?) + word = re.sub("(?<=[^\\\\])[\"']","", word) + + # escaped quotes in input become literal in returned token + word = word.replace('\\"','"').replace("\\'","'") return word