From dcf931537def1ff5c99b32587e34747dad19211d Mon Sep 17 00:00:00 2001
From: Tom O'Hara <tohara@tohara.com>
Date: Sat, 24 Jun 2023 00:44:41 -0500
Subject: [PATCH] tokenizer: add support for using unimplemented nodes for
 array assignment (fixes #88)

---
 bashlex/flags.py     |  1 +
 bashlex/parser.py    |  3 +++
 bashlex/tokenizer.py | 27 ++++++++++++++++++++++++---
 tests/test_parser.py | 15 +++++++++++++--
 4 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/bashlex/flags.py b/bashlex/flags.py
index 8c5ad3a0..cd5ef12d 100644
--- a/bashlex/flags.py
+++ b/bashlex/flags.py
@@ -52,4 +52,5 @@
     'ASSNGLOBAL', # word is a global assignment to declare (declare/typeset -g)
     'NOBRACE', # Don't perform brace expansion
     'ASSIGNINT', # word is an integer assignment to declare
+    'UNIMPLEMENTED', # word uses unimplemented feature (e.g., array)
     ])
diff --git a/bashlex/parser.py b/bashlex/parser.py
index 6980ee75..96e769c7 100644
--- a/bashlex/parser.py
+++ b/bashlex/parser.py
@@ -173,6 +173,8 @@ def p_simple_command_element(p):
     # change the word node to an assignment if necessary
     if p.slice[1].ttype == tokenizer.tokentype.ASSIGNMENT_WORD:
         p[0][0].kind = 'assignment'
+        if (p.slice[1].flags & flags.word.UNIMPLEMENTED):
+            p[0][0].kind = 'unimplemented'
 
 def p_redirection_list(p):
     '''redirection_list : redirection
@@ -720,6 +722,7 @@ def __init__(self, s, strictmode=True, expansionlimit=None, tokenizerargs=None,
         self.tok = tokenizer.tokenizer(s,
                                        parserstate=self.parserstate,
                                        strictmode=strictmode,
+                                       proceedonerror=proceedonerror,
                                        **tokenizerargs)
 
         self.redirstack = self.tok.redirstack
diff --git a/bashlex/tokenizer.py b/bashlex/tokenizer.py
index f71be67b..ce2d4456 100644
--- a/bashlex/tokenizer.py
+++ b/bashlex/tokenizer.py
@@ -199,7 +199,8 @@ def nopos(self):
 
 class tokenizer(object):
     def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
-                 lastreadtoken=None, tokenbeforethat=None, twotokensago=None):
+                 lastreadtoken=None, tokenbeforethat=None, twotokensago=None,
+                 proceedonerror=None):
         self._shell_eof_token = eoftoken
         self._shell_input_line = s
         self._added_newline = False
@@ -232,6 +233,7 @@ def __init__(self, s, parserstate, strictmode=True, eoftoken=None,
         self._positions = []
 
         self._strictmode = strictmode
+        self._proceedonerror = proceedonerror
 
         # hack: the tokenizer needs access to the stack of redirection
         # nodes when it reads heredocs. this instance is shared between
@@ -391,7 +393,7 @@ def _readtoken(self):
     def _readtokenword(self, c):
         d = {}
         d['all_digit_token'] = c.isdigit()
-        d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = False
+        d['dollar_present'] = d['quoted'] = d['pass_next_character'] = d['compound_assignment'] = d['unimplemented'] = False
 
         tokenword = []
 
@@ -467,6 +469,19 @@ def handleshellexp():
 
             # bashlex/parse.y L4699 ARRAY_VARS
 
+        def handlecompoundassignment():
+            # note: only finds matching parenthesis, so parsing can proceed
+            handled = False
+            if self._proceedonerror:
+                ttok = self._parse_matched_pair(None, '(', ')')
+                if ttok:
+                    tokenword.append(c)
+                    tokenword.extend(ttok)            
+                    d['compound_assignment'] = True
+                    d['unimplemented'] = True
+                    handled = True
+            return handled
+
         def handleescapedchar():
             tokenword.append(c)
             d['all_digit_token'] &= c.isdigit()
@@ -512,6 +527,8 @@ def handleescapedchar():
                 elif _shellexp(c):
                     gotonext = not handleshellexp()
                     # bashlex/parse.y L4699
+                elif c == '(' and handlecompoundassignment():
+                    gotonext = True
                 if not gotonext:
                     if _shellbreak(c):
                         self._ungetc(c)
@@ -573,7 +590,7 @@ def handleescapedchar():
             tokenword.flags.add(wordflags.HASDOLLAR)
         if d['quoted']:
             tokenword.flags.add(wordflags.QUOTED)
-        if d['compound_assignment'] and tokenword[-1] == ')':
+        if d['compound_assignment'] and tokenword.value[-1] == ')':
             tokenword.flags.add(wordflags.COMPASSIGN)
         if self._is_assignment(tokenword.value, bool(self._parserstate & parserflags.COMPASSIGN)):
             tokenword.flags.add(wordflags.ASSIGNMENT)
@@ -581,6 +598,10 @@ def handleescapedchar():
                 tokenword.flags.add(wordflags.NOSPLIT)
                 if self._parserstate & parserflags.COMPASSIGN:
                     tokenword.flags.add(wordflags.NOGLOB)
+        if d['compound_assignment']:
+            tokenword.flags.add(wordflags.ASSIGNARRAY)
+        if d['unimplemented']:
+            tokenword.flags.add(wordflags.UNIMPLEMENTED)
 
         # bashlex/parse.y L4865
         if self._command_token_position(self._last_read_token):
diff --git a/tests/test_parser.py b/tests/test_parser.py
index edffe9ca..80cb76dc 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -91,8 +91,8 @@ def patternnode(s, *parts):
 def functionnode(s, name, body, *parts):
     return ast.node(kind='function', name=name, body=body, parts=list(parts), s=s)
 
-def unimplementednode(s, *parts):
-    return ast.node(kind='unimplemented', parts=list(parts), s=s)
+def unimplementednode(s, *parts, **kwargs):
+    return ast.node(kind='unimplemented', parts=list(parts), s=s, **kwargs)
 
 class test_parser(unittest.TestCase):
 
@@ -1250,3 +1250,14 @@ def test_unimplemented(self):
               proceedonerror=True)
       with self.assertRaises(NotImplementedError):
           parse(s, proceedonerror=False)
+          
+    def test_array_assignemnt(self):
+      s = "num1=2 arr=(1 2 3) num2=3"
+      self.assertASTEquals(s,
+              commandnode(s,
+                  assignmentnode('num1=2', 'num1=2'),
+                  unimplementednode('arr=(1 2 3)', word='arr=(1 2 3)'),
+                  assignmentnode('num2=3', 'num2=3')),
+              proceedonerror=True)
+      with self.assertRaises(errors.ParsingError):
+          parse(s, proceedonerror=False)