diff --git a/bashlex/heredoc.py b/bashlex/heredoc.py index c6ee2e4f..d043c16a 100644 --- a/bashlex/heredoc.py +++ b/bashlex/heredoc.py @@ -1,4 +1,5 @@ from bashlex import ast, errors +import re def gatherheredocuments(tokenizer): # if we're at the end of the input and we're not strict, allow skipping @@ -11,9 +12,43 @@ def gatherheredocuments(tokenizer): redirnode, killleading = tokenizer.redirstack.pop(0) makeheredoc(tokenizer, redirnode, 0, killleading) + +def string_quote_removal(word): + """ + Remove surrounding quotes for heredoc token. + Other quotes are removed from the token unless escaped. + + See definition in bash's source + https://github.com/bminor/bash/blob/master/subst.c#L11892 + + >>> string_quote_removal("EOF") + 'EOF' + >>> string_quote_removal("'EOF'") + 'EOF' + >>> string_quote_removal('"EOF"') + 'EOF' + >>> string_quote_removal('HERE\\\\"DOC\\\\"') + 'HERE"DOC"' + >>> string_quote_removal('"HERE"x"DOC"') + 'HERExDOC' + + """ + # remove paired quote from start and end + quote_match = re.search("^([\"'])(.*)\\1$", word) + if quote_match: + word = quote_match.group(2) + + # removing unescaped quotes. assumes matching quote pairs + # bash code would fail to parse otherwise (?) + word = re.sub("(?<=[^\\\\])[\"']","", word) + + # escaped quotes in input become literal in returned token + word = word.replace('\\"','"').replace("\\'","'") + return word + + def makeheredoc(tokenizer, redirnode, lineno, killleading): - # redirword = string_quote_removal(redirectnode.word) - redirword = redirnode.output.word + redirword = string_quote_removal(redirnode.output.word) document = [] startpos = tokenizer._shell_input_line_index diff --git a/tests/test_parser.py b/tests/test_parser.py index 10809c79..f04e83c8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -779,6 +779,23 @@ def test_heredoc_with_actual_doc(self): "delimited by end-of-file \\(wanted 'EOF'", parse, s) + def test_heredoc_singlequotes(self): + doc = 'foo\nbar\nEOF' + s = '''a <<'EOF' +%s''' % doc + + self.assertASTEquals(s, + commandnode("a <<'EOF'", + wordnode('a'), + redirectnode("<<'EOF'\n%s" % doc, None, '<<', wordnode("'EOF'"), + heredocnode(doc)) + )) + + s = "a <<'EOF'\nb" + self.assertRaisesRegex(errors.ParsingError, + "delimited by end-of-file \\(wanted 'EOF'", + parse, s) + def test_herestring(self): s = 'a <<<"b\nc"' self.assertASTEquals(s,