From dcf931537def1ff5c99b32587e34747dad19211d Mon Sep 17 00:00:00 2001 From: Tom O'Hara Date: Sat, 24 Jun 2023 00:44:41 -0500 Subject: [PATCH] tokenizer: add support for using unimplemented nodes for array assignment (fixes #88) --- bashlex/flags.py | 1 + bashlex/parser.py | 3 +++ bashlex/tokenizer.py | 27 ++++++++++++++++++++++++--- tests/test_parser.py | 15 +++++++++++++-- 4 files changed, 41 insertions(+), 5 deletions(-) diff --git a/bashlex/flags.py b/bashlex/flags.py index 8c5ad3a0..cd5ef12d 100644 --- a/bashlex/flags.py +++ b/bashlex/flags.py @@ -52,4 +52,5 @@ 'ASSNGLOBAL', # word is a global assignment to declare (declare/typeset -g) 'NOBRACE', # Don't perform brace expansion 'ASSIGNINT', # word is an integer assignment to declare + 'UNIMPLEMENTED', # word uses unimplemented feature (e.g., array) ]) diff --git a/bashlex/parser.py b/bashlex/parser.py index 6980ee75..96e769c7 100644 --- a/bashlex/parser.py +++ b/bashlex/parser.py @@ -173,6 +173,8 @@ def p_simple_command_element(p): # change the word node to an assignment if necessary if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD: p[0][0].kind = 'assignment' + if (p.slice[1].flags & flags.word.UNIMPLEMENTED): + p[0][0].kind = 'unimplemented' def p_redirection_list(p): '''redirection_list : redirection @@ -720,6 +722,7 @@ def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None, self.tok = tokenizer.tokenizer(s, parserstate=self.parserstate, strictmode=strictmode, + proceedonerror=proceedonerror, **tokenizerargs) self.redirstack = self.tok.redirstack diff --git a/bashlex/tokenizer.py b/bashlex/tokenizer.py index f71be67b..ce2d4456 100644 --- a/bashlex/tokenizer.py +++ b/bashlex/tokenizer.py @@ -199,7 +199,8 @@ def nopos(self): class tokenizer(object): def __init__(self, s, parserstate, strictmode=True, eoftoken=None, - lastreadtoken=None, tokenbeforethat=None, twotokensago=None): + lastreadtoken=None, tokenbeforethat=None, twotokensago=None, + proceedonerror=None): self._shell_eof_token = eoftoken self._shell_input_line = s self._added_newline = False @@ -232,6 +233,7 @@ def __init__(self, s, parserstate, strictmode=True, eoftoken=None, self._positions = [] self._strictmode = strictmode + self._proceedonerror = proceedonerror # hack: the tokenizer needs access to the stack of redirection # nodes when it reads heredocs. this instance is shared between @@ -391,7 +393,7 @@ def _readtoken(self): def _readtokenword(self, c): d = {} d['all_digit_token'] = c.isdigit() - d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False + d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = d['unimplemented'] = False tokenword = [] @@ -467,6 +469,19 @@ def handleshellexp(): # bashlex/parse.y L4699 ARRAY_VARS + def handlecompoundassignment(): + # note: only finds matching parenthesis, so parsing can proceed + handled = False + if self._proceedonerror: + ttok = self._parse_matched_pair(None, '(', ')') + if ttok: + tokenword.append(c) + tokenword.extend(ttok) + d['compound_assignment'] = True + d['unimplemented'] = True + handled = True + return handled + def handleescapedchar(): tokenword.append(c) d['all_digit_token'] &= c.isdigit() @@ -512,6 +527,8 @@ def handleescapedchar(): elif _shellexp(c): gotonext = not handleshellexp() # bashlex/parse.y L4699 + elif c == '(' and handlecompoundassignment(): + gotonext = True if not gotonext: if _shellbreak(c): self._ungetc(c) @@ -573,7 +590,7 @@ def handleescapedchar(): tokenword.flags.add(wordflags.HASDOLLAR) if d['quoted']: tokenword.flags.add(wordflags.QUOTED) - if d['compound_assignment'] and tokenword[-1] == ')': + if d['compound_assignment'] and tokenword.value[-1] == ')': tokenword.flags.add(wordflags.COMPASSIGN) if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)): tokenword.flags.add(wordflags.ASSIGNMENT) @@ -581,6 +598,10 @@ def handleescapedchar(): tokenword.flags.add(wordflags.NOSPLIT) if self._parserstate & parserflags.COMPASSIGN: tokenword.flags.add(wordflags.NOGLOB) + if d['compound_assignment']: + tokenword.flags.add(wordflags.ASSIGNARRAY) + if d['unimplemented']: + tokenword.flags.add(wordflags.UNIMPLEMENTED) # bashlex/parse.y L4865 if self._command_token_position(self._last_read_token): diff --git a/tests/test_parser.py b/tests/test_parser.py index edffe9ca..80cb76dc 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -91,8 +91,8 @@ def patternnode(s, *parts): def functionnode(s, name, body, *parts): return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s) -def unimplementednode(s, *parts): - return ast.node(kind='unimplemented', parts=list(parts), s=s) +def unimplementednode(s, *parts, **kwargs): + return ast.node(kind='unimplemented', parts=list(parts), s=s, **kwargs) class test_parser(unittest.TestCase): @@ -1250,3 +1250,14 @@ def test_unimplemented(self): proceedonerror=True) with self.assertRaises(NotImplementedError): parse(s, proceedonerror=False) + + def test_array_assignemnt(self): + s = "num1=2 arr=(1 2 3) num2=3" + self.assertASTEquals(s, + commandnode(s, + assignmentnode('num1=2', 'num1=2'), + unimplementednode('arr=(1 2 3)', word='arr=(1 2 3)'), + assignmentnode('num2=3', 'num2=3')), + proceedonerror=True) + with self.assertRaises(errors.ParsingError): + parse(s, proceedonerror=False)