From a591955773ca16b15930b5c4e955a058f88a7b5e Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 31 Jul 2020 18:42:37 +0200 Subject: [PATCH 01/11] Rename and add CheckLineTerminator field to toogle the condition. --- jsparagus/actions.py | 28 ++++++++++++++++++---------- jsparagus/emit/python.py | 4 ++-- jsparagus/emit/rust.py | 5 +++-- jsparagus/lr0.py | 4 ++-- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/jsparagus/actions.py b/jsparagus/actions.py index 29d810edb..ac10b7425 100644 --- a/jsparagus/actions.py +++ b/jsparagus/actions.py @@ -96,7 +96,7 @@ def __init__(self) -> None: def is_inconsistent(self) -> bool: """Returns True if this action is inconsistent. An action can be inconsistent if the parameters it is given cannot be evaluated given - its current location in the parse table. Such as CheckNotOnNewLine. + its current location in the parse table. Such as CheckLineTerminator. """ return False @@ -374,18 +374,26 @@ def shifted_action(self, shifted_term: Element) -> ShiftedAction: return not self.accept -class CheckNotOnNewLine(Action): +class CheckLineTerminator(Action): """Check whether the terminal at the given stack offset is on a new line or - not. If not this would produce an Error, otherwise this rule would be - shifted.""" - __slots__ = ['offset'] + not. If the condition is true, then the edge is followed. """ + __slots__ = ['offset', 'is_on_new_line'] + # Offset of the token which is being checked. + # - If this number is zero, then # this represent the next token. + # - If this number is -1, this represents the last shifted token. + # - If this number is -2, this represents the second to last shifted token. offset: int - def __init__(self, offset: int = 0) -> None: + # Check whether the token at the offset is (= True), or is not (= False) on + # a new line compared to the previous token. + is_on_new_line: bool + + def __init__(self, offset: int = 0, is_on_new_line: bool = False) -> None: # assert offset >= -1 and "Smaller offsets are not supported on all backends." super().__init__() self.offset = offset + self.is_on_new_line = is_on_new_line def is_inconsistent(self) -> bool: # We can only look at stacked terminals. Having an offset of 0 implies @@ -397,11 +405,11 @@ def is_inconsistent(self) -> bool: def is_condition(self) -> bool: return True - def condition(self) -> CheckNotOnNewLine: + def condition(self) -> CheckLineTerminator: return self def check_same_variable(self, other: Action) -> bool: - return isinstance(other, CheckNotOnNewLine) and self.offset == other.offset + return isinstance(other, CheckLineTerminator) and self.offset == other.offset def check_different_values(self, other: Action) -> bool: return False @@ -409,10 +417,10 @@ def check_different_values(self, other: Action) -> bool: def shifted_action(self, shifted_term: Element) -> ShiftedAction: if isinstance(shifted_term, Nt): return True - return CheckNotOnNewLine(self.offset - 1) + return CheckLineTerminator(self.offset - 1, self.is_on_new_line) def __str__(self) -> str: - return "CheckNotOnNewLine({})".format(self.offset) + return "CheckLineTerminator({}, {})".format(self.offset, self.is_on_new_line) class FilterStates(Action): diff --git a/jsparagus/emit/python.py b/jsparagus/emit/python.py index c974d371c..d3ff7d770 100644 --- a/jsparagus/emit/python.py +++ b/jsparagus/emit/python.py @@ -6,7 +6,7 @@ import typing from ..grammar import ErrorSymbol, Nt, Some -from ..actions import (Accept, Action, CheckNotOnNewLine, FilterFlag, FilterStates, FunCall, +from ..actions import (Accept, Action, CheckLineTerminator, FilterFlag, FilterStates, FunCall, Lookahead, OutputExpr, PopFlag, PushFlag, Reduce, Replay, Seq, Unwind) from ..runtime import ErrorToken, ErrorTokenClass from ..ordered import OrderedSet @@ -73,7 +73,7 @@ def write_action(act: Action, indent: str = "") -> typing.Tuple[str, bool]: return indent, False if isinstance(act, Lookahead): raise ValueError("Unexpected Lookahead action") - if isinstance(act, CheckNotOnNewLine): + if isinstance(act, CheckLineTerminator): out.write("{}if not parser.check_not_on_new_line(lexer, {}):\n".format(indent, -act.offset)) out.write("{} return\n".format(indent)) return indent, True diff --git a/jsparagus/emit/rust.py b/jsparagus/emit/rust.py index ca8382954..bd60beb94 100644 --- a/jsparagus/emit/rust.py +++ b/jsparagus/emit/rust.py @@ -12,7 +12,7 @@ from ..ordered import OrderedSet from ..grammar import (Some, Nt, InitNt, End, ErrorSymbol) -from ..actions import (Accept, Action, Replay, Unwind, Reduce, CheckNotOnNewLine, FilterStates, +from ..actions import (Accept, Action, Replay, Unwind, Reduce, CheckLineTerminator, FilterStates, PushFlag, PopFlag, FunCall, Seq) from .. import types @@ -223,7 +223,7 @@ def write_condition(self, state, first_act): # states. Thus we use the first action to produce the match statement. assert isinstance(first_act, Action) assert first_act.is_condition() - if isinstance(first_act, CheckNotOnNewLine): + if isinstance(first_act, CheckLineTerminator): # TODO: At the moment this is Action is implemented as a single # operation with a single destination. However, we should implement # it in the future as 2 branches, one which is verifying the lack @@ -234,6 +234,7 @@ def write_condition(self, state, first_act): act, dest = next(state.edges()) assert len(self.replay_args) == 0 assert -act.offset > 0 + assert act.is_on_new_line == False self.write("// {}", str(act)) self.write("if !parser.check_not_on_new_line({})? {{", -act.offset) with indent(self): diff --git a/jsparagus/lr0.py b/jsparagus/lr0.py index 5e4364df3..b0f1a7215 100644 --- a/jsparagus/lr0.py +++ b/jsparagus/lr0.py @@ -11,7 +11,7 @@ from dataclasses import dataclass import typing -from .actions import (Accept, Action, CheckNotOnNewLine, FunCall, Lookahead, +from .actions import (Accept, Action, CheckLineTerminator, FunCall, Lookahead, OutputExpr, Unwind, Reduce, Seq) from .ordered import OrderedFrozenSet from .grammar import (CallMethod, Element, End, ErrorSymbol, Grammar, @@ -336,7 +336,7 @@ def item_transitions( # Check whether the following terminal is on a new line. If # not, this would produce a syntax error. The argument is the # terminal offset. - term = CheckNotOnNewLine() + term = CheckLineTerminator() elif isinstance(term, CallMethod): funcalls: typing.List[Action] = [] pop = sum(1 for e in prod.rhs[:lr_item.offset] if on_stack(self.grammar.grammar, e)) From f5c5bb89388d308e23b2e252e5ea472337dad77b Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Mon, 3 Aug 2020 19:12:13 +0200 Subject: [PATCH 02/11] Add a phase to expand JS ASI rules into ParseTable states. --- jsparagus/aps.py | 1 + jsparagus/grammar.py | 2 +- jsparagus/lr0.py | 2 +- jsparagus/parse_table.py | 130 ++++++++++++++++++++++++++++++++++++++- 4 files changed, 130 insertions(+), 5 deletions(-) diff --git a/jsparagus/aps.py b/jsparagus/aps.py index 18bb7f84a..5e1a2a3e7 100644 --- a/jsparagus/aps.py +++ b/jsparagus/aps.py @@ -217,6 +217,7 @@ def shift_next(self, pt: ParseTable) -> typing.Iterator[APS]: last_edge = sh[-1] state = pt.states[last_edge.src] state_match_shift_end = self.state == self.shift[-1].src + term: Term if self.replay == []: assert state_match_shift_end for term, to in state.shifted_edges(): diff --git a/jsparagus/grammar.py b/jsparagus/grammar.py index bcaf5a02a..94db558b0 100644 --- a/jsparagus/grammar.py +++ b/jsparagus/grammar.py @@ -1145,7 +1145,7 @@ class End: @dataclass(frozen=True) class ErrorSymbol: """Special grammar symbol that can be consumed to handle a syntax error.""" - error_code: int + error_code: str Element = typing.Union[ diff --git a/jsparagus/lr0.py b/jsparagus/lr0.py index b0f1a7215..8af81bd41 100644 --- a/jsparagus/lr0.py +++ b/jsparagus/lr0.py @@ -140,7 +140,7 @@ class LRItem: # A Term is the label on an edge from one state to another. It's normally a # terminal, nonterminal, or epsilon action. A state can also have a special # catchall edge, labeled with an ErrorSymbol. -ShiftedTerm = typing.Union[str, Nt, ErrorSymbol] +ShiftedTerm = typing.Union[str, End, Nt, ErrorSymbol] Term = typing.Union[ShiftedTerm, Action] diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index ebf548c86..ea7638de7 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -11,7 +11,8 @@ from . import types from .utils import consume, keep_until, split, default_id_dict, default_fwd_dict from .ordered import OrderedSet, OrderedFrozenSet -from .actions import Action, Replay, Reduce, FilterStates, Seq +from .actions import (Action, Replay, Reduce, Unwind, FilterStates, Seq, + CheckLineTerminator) from .grammar import End, ErrorSymbol, InitNt, Nt from .rewrites import CanonicalGrammar from .lr0 import LR0Generator, Term @@ -56,7 +57,7 @@ class StateAndTransitions: arguments: int # Outgoing edges taken when shifting terminals. - terminals: typing.Dict[str, StateId] + terminals: typing.Dict[typing.Union[str, End], StateId] # Outgoing edges taken when shifting nonterminals after reducing. nonterminals: typing.Dict[Nt, StateId] @@ -153,7 +154,7 @@ def is_inconsistent(self) -> bool: return False def shifted_edges(self) -> typing.Iterator[ - typing.Tuple[typing.Union[str, Nt, ErrorSymbol], StateId] + typing.Tuple[typing.Union[str, End, Nt, ErrorSymbol], StateId] ]: k: Term s: StateId @@ -364,6 +365,13 @@ def __init__( self.exec_modes = grammar.grammar.exec_modes self.assume_inconsistent = True self.create_lr0_table(grammar, verbose, progress) + # Automatic Semicolon Insertion (ASI) is a process by which the + # JavaScript grammar is implicitly disambiguated. This phase would make + # this grammar transformation explicit. + self.expand_javascript_asi(verbose, progress) + # LR0 is incosnistent, which implies that we need a non-determinisitic + # evaluator. This phase solves ambiguities of the grammar by adding + # more lookahead and context. self.fix_inconsistent_table(verbose, progress) # TODO: Optimize chains of actions into sequences. # Optimize by removing unused states. @@ -915,6 +923,122 @@ def visit(aps: APS) -> bool: self.aps_visitor(APS.start(state), visit) return record + def expand_javascript_asi(self, verbose: bool, progress: bool) -> None: + """The frontend of the JavaScript grammar adds ErrorSymbols to handle Automatic + Semicolon Insertion (ASI). As described in the ECMAScript + Specification [1], these locations are used as a fallback mechanism for + tokens which would not be allowed by the grammar otherwise. + + Implementing these rules implies that we have to look at all locations + which have an ErrorSymbol dedicated to ASI, and add the missing + transitions, such that the ErrorSymbol can be converted to an ordinary + nonterminal of the grammar. + + Note, this is an optimization dedicated to remove the handling of a + replay-list (variable list of lookahead) as well as a fallback + mechanism for the runtime of the parser. + + [1] https://tc39.es/ecma262/#sec-automatic-semicolon-insertion + + """ + if verbose or progress: + print("Expand JavaScript Automatic Semicolon Insertion") + + # Collect all states which have an ErrorSymbol. + todo = [] + for s in self.states: + if len(s.errors) > 0: + todo.append(s) + if todo == []: + return + + # Get the dead-end state, as the destination of the reduce edges. + dead_end = self.get_state(OrderedFrozenSet()) + + # Add Reduce ErrorSymbol("asi") + asi_nt = Nt(str(ErrorSymbol("asi"))) + self.nonterminals.append(asi_nt) + reduce_asi = self.get_state( + OrderedFrozenSet(['ErrorSymbol("asi") ::= ·', + 'ErrorSymbol("asi") ::= [LineTerminator here] ·'])) + self.add_edge(reduce_asi, Reduce(Unwind(asi_nt, 0, 1)), dead_end.index) + + # Add CheckLineTerminator -> Reduce ErrorSymbol("asi") + newline_asi_term = CheckLineTerminator(-1, True) + newline_asi = self.get_state( + OrderedFrozenSet(['ErrorSymbol("asi") ::= [LineTerminator here] ·']), + OrderedFrozenSet([newline_asi_term])) + self.add_edge(newline_asi, newline_asi_term, reduce_asi.index) + + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + no_LineTerminator_here = CheckLineTerminator(0, False) + + def contains_no_LineTerminator_here(aps: APS) -> bool: + for edge in aps.history: + if edge.term == no_LineTerminator_here: + return True + return False + + # Replace error symbols edges of the parse table by the semantic + # mentioned in [1] by adding the equivalent parse table rules. The + # error symbols are replaced by non-terminals which are reduced by the + # `reduce_asi` and `reduce_dw_asi` states. + # + # [1] https://tc39.es/ecma262/#sec-automatic-semicolon-insertion + def visit_error_symbols() -> typing.Iterator[None]: + for s in todo: + yield # progress bar. + assert len(s.errors) == 1 + + # 11.9.1.1 Capture the list of offending tokens. An offending + # token is a token which is not allowed by any production of + # the grammar. + # + # 11.9.1.2 (End is implicitly considered as a terminal) + aps_lanes = self.lookahead_lanes(s.index) + assert all(len(aps.lookahead) >= 1 for aps in aps_lanes) + accepted_tokens = set(aps.lookahead[0] for aps in aps_lanes) + offending_tokens = [t for t in self.terminals if t not in accepted_tokens] + + # 11.9.1.3 A restricted token is a token from a restricted + # production, i-e. which has a `[no LineTerminator here]` + # before the token. + restricted_lanes = [aps for aps in aps_lanes if contains_no_LineTerminator_here(aps)] + restricted_tokens = [aps.lookahead[0] for aps in restricted_lanes] + + # Get the index of the state to go to after the nonterminal + # semicolon. + error, dest = next(iter(s.errors.items())) + self.remove_edge(s, error, maybe_unreachable_set) + self.add_edge(s, asi_nt, dest) + + if "}" in offending_tokens: + self.add_edge(s, "}", reduce_asi.index) + offending_tokens.remove("}") + + if error == ErrorSymbol("asi"): + offending_target = newline_asi + elif error == ErrorSymbol("do_while_asi"): + # `do{}while(false) foo()` is a valid JavaScript syntax, + # where a semicolon is automatically inserted between the + # ending parenthesis of the while statement and the foo + # identifier. + offending_target = reduce_asi + else: + raise ValueError("Unexpected ErrorSymbol code") + + for token in offending_tokens: + self.add_edge(s, token, offending_target.index) + for term in restricted_tokens: + if isinstance(term, (str, End)): + self.add_edge(s, term, newline_asi.index) + + consume(visit_error_symbols(), progress) + + if verbose: + print("Expand JavaScript Automatic Semicolon Insertion") + self.debug_dump() + def fix_with_context(self, s: StateId, aps_lanes: typing.List[APS]) -> None: raise ValueError("fix_with_context: Not Implemented") # # This strategy is about using context information. By using chains of From c1711a810813794254980d6c4d49579054e70f11 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Mon, 3 Aug 2020 19:32:44 +0200 Subject: [PATCH 03/11] Extract restore_edges out of fix_with_lookahead. --- jsparagus/parse_table.py | 137 ++++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 67 deletions(-) diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index ea7638de7..82790a7ab 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -595,6 +595,75 @@ def clear_edges( for dest in old_dest: self.assert_state_invariants(dest) + def restore_edges( + self, + state: StateAndTransitions, + shift_map: typing.DefaultDict[ + Term, + typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] + ], + maybe_unreachable_set: OrderedSet[StateId], + debug_depth: str = "" + ) -> None: + """Restore the new state machine based on a given state to use as a base and + the shift_map corresponding to edges.""" + + # print("{}starting with {}\n".format(depth, state)) + edges = {} + for term, actions_list in shift_map.items(): + # print("{}term: {}, lists: {}\n".format(depth, repr(term), repr(actions_list))) + # Collect all the states reachable after shifting the term. + # Compute the unique name, based on the locations and actions + # which are delayed. + locations: OrderedSet[str] = OrderedSet() + delayed: OrderedSet[DelayedAction] = OrderedSet() + new_shift_map: typing.DefaultDict[ + Term, + typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] + ] + new_shift_map = collections.defaultdict(lambda: []) + recurse = False + if not self.term_is_shifted(term): + # There is no more target after a reduce action. + actions_list = [] + for target, actions in actions_list: + assert isinstance(target, StateAndTransitions) + locations |= target.locations + delayed |= target.delayed_actions + if actions != []: + # Pull edges, with delayed actions. + edge = actions[0] + assert isinstance(edge, Edge) + for action in actions: + action_term = action.term + assert isinstance(action_term, Action) + delayed.add(action_term) + edge_term = edge.term + assert edge_term is not None + new_shift_map[edge_term].append((target, actions[1:])) + recurse = True + else: + # Pull edges, as a copy of existing edges. + for next_term, next_dest_id in target.edges(): + next_dest = self.states[next_dest_id] + new_shift_map[next_term].append((next_dest, [])) + + is_new, new_target = self.new_state( + OrderedFrozenSet(locations), OrderedFrozenSet(delayed)) + edges[term] = new_target.index + if self.debug_info: + print("{}is_new = {}, index = {}".format(debug_depth, is_new, new_target.index)) + print("{}Add: {} -- {} --> {}".format(debug_depth, state.index, str(term), new_target.index)) + print("{}continue: (is_new: {}) or (recurse: {})".format(debug_depth, is_new, recurse)) + if is_new or recurse: + self.restore_edges(new_target, new_shift_map, maybe_unreachable_set, debug_depth + " ") + + self.clear_edges(state, maybe_unreachable_set) + for term, target_id in edges.items(): + self.add_edge(state, term, target_id) + if self.debug_info: + print("{}replaced by {}\n".format(debug_depth, state)) + def assert_table_invariants(self) -> None: for s in self.states: if s is not None: @@ -1305,74 +1374,8 @@ def fix_with_lookahead(self, s: StateId, aps_lanes: typing.List[APS]) -> None: target = self.states[target_id] shift_map[term].append((target, new_actions)) - # Restore the new state machine based on a given state to use as a base - # and the shift_map corresponding to edges. - def restore_edges( - state: StateAndTransitions, - shift_map: typing.DefaultDict[ - Term, - typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] - ], - depth: str - ) -> None: - # print("{}starting with {}\n".format(depth, state)) - edges = {} - for term, actions_list in shift_map.items(): - # print("{}term: {}, lists: {}\n".format(depth, repr(term), repr(actions_list))) - # Collect all the states reachable after shifting the term. - # Compute the unique name, based on the locations and actions - # which are delayed. - locations: OrderedSet[str] = OrderedSet() - delayed: OrderedSet[DelayedAction] = OrderedSet() - new_shift_map: typing.DefaultDict[ - Term, - typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] - ] - new_shift_map = collections.defaultdict(lambda: []) - recurse = False - if not self.term_is_shifted(term): - # There is no more target after a reduce action. - actions_list = [] - for target, actions in actions_list: - assert isinstance(target, StateAndTransitions) - locations |= target.locations - delayed |= target.delayed_actions - if actions != []: - # Pull edges, with delayed actions. - edge = actions[0] - assert isinstance(edge, Edge) - for action in actions: - action_term = action.term - assert isinstance(action_term, Action) - delayed.add(action_term) - edge_term = edge.term - assert edge_term is not None - new_shift_map[edge_term].append((target, actions[1:])) - recurse = True - else: - # Pull edges, as a copy of existing edges. - for next_term, next_dest_id in target.edges(): - next_dest = self.states[next_dest_id] - new_shift_map[next_term].append((next_dest, [])) - - is_new, new_target = self.new_state( - OrderedFrozenSet(locations), OrderedFrozenSet(delayed)) - edges[term] = new_target.index - if self.debug_info: - print("{}is_new = {}, index = {}".format(depth, is_new, new_target.index)) - print("{}Add: {} -- {} --> {}".format(depth, state.index, str(term), new_target.index)) - print("{}continue: (is_new: {}) or (recurse: {})".format(depth, is_new, recurse)) - if is_new or recurse: - restore_edges(new_target, new_shift_map, depth + " ") - - self.clear_edges(state, maybe_unreachable_set) - for term, target_id in edges.items(): - self.add_edge(state, term, target_id) - if self.debug_info: - print("{}replaced by {}\n".format(depth, state)) - state = self.states[s] - restore_edges(state, shift_map, "") + self.restore_edges(state, shift_map, maybe_unreachable_set) self.remove_unreachable_states(maybe_unreachable_set) def fix_inconsistent_state(self, s: StateId, verbose: bool) -> bool: From 2878d88ebb510183ef28af944f1285bbbbcbb43b Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 6 Aug 2020 16:54:14 +0200 Subject: [PATCH 04/11] Explicit tokens used for reducing error symbols. This sharing of states causes the resolution of the inconsistencies to take a long time, and also causes issues in lower_reduce_action. This change remove the sharing and explicit each tokens. --- jsparagus/parse_table.py | 42 +++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index 82790a7ab..56c4cdde5 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -1027,17 +1027,21 @@ def expand_javascript_asi(self, verbose: bool, progress: bool) -> None: # Add Reduce ErrorSymbol("asi") asi_nt = Nt(str(ErrorSymbol("asi"))) self.nonterminals.append(asi_nt) - reduce_asi = self.get_state( - OrderedFrozenSet(['ErrorSymbol("asi") ::= ·', - 'ErrorSymbol("asi") ::= [LineTerminator here] ·'])) - self.add_edge(reduce_asi, Reduce(Unwind(asi_nt, 0, 1)), dead_end.index) + reduce_asi: typing.Dict[ShiftedTerm, StateAndTransitions] = {} + for t in self.terminals: + reduce_asi[t] = self.get_state( + OrderedFrozenSet(['ErrorSymbol("asi") ::= {} ·'.format(str(t)), + 'ErrorSymbol("asi") ::= [LineTerminator here] {} ·'.format(str(t))])) + self.add_edge(reduce_asi[t], Reduce(Unwind(asi_nt, 0, 1)), dead_end.index) # Add CheckLineTerminator -> Reduce ErrorSymbol("asi") newline_asi_term = CheckLineTerminator(-1, True) - newline_asi = self.get_state( - OrderedFrozenSet(['ErrorSymbol("asi") ::= [LineTerminator here] ·']), - OrderedFrozenSet([newline_asi_term])) - self.add_edge(newline_asi, newline_asi_term, reduce_asi.index) + newline_asi: typing.Dict[ShiftedTerm, StateAndTransitions] = {} + for t in self.terminals: + newline_asi[t] = self.get_state( + OrderedFrozenSet(['ErrorSymbol("asi") ::= [LineTerminator here] {} ·'.format(str(t))]), + OrderedFrozenSet([newline_asi_term])) + self.add_edge(newline_asi[t], newline_asi_term, reduce_asi[t].index) maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() no_LineTerminator_here = CheckLineTerminator(0, False) @@ -1058,16 +1062,28 @@ def visit_error_symbols() -> typing.Iterator[None]: for s in todo: yield # progress bar. assert len(s.errors) == 1 + error, error_dest = next(iter(s.errors.items())) # 11.9.1.1 Capture the list of offending tokens. An offending # token is a token which is not allowed by any production of # the grammar. # # 11.9.1.2 (End is implicitly considered as a terminal) + # + # Note: Normally offending tokens are any tokens which does not + # satisfy any production of the grammar, however doing so by + # using the list of all possible tokens causes an ambiguity in + # the parse table. Thus we restrict this to the list of tokens + # which might be accepted after the error symbol. aps_lanes = self.lookahead_lanes(s.index) + aps_lanes_on_error = self.lookahead_lanes(error_dest) assert all(len(aps.lookahead) >= 1 for aps in aps_lanes) accepted_tokens = set(aps.lookahead[0] for aps in aps_lanes) - offending_tokens = [t for t in self.terminals if t not in accepted_tokens] + accepted_tokens_on_error = set(aps.lookahead[0] for aps in aps_lanes_on_error) + offending_tokens = [ + t for t in accepted_tokens_on_error + if t not in accepted_tokens and t in self.terminals + ] # 11.9.1.3 A restricted token is a token from a restricted # production, i-e. which has a `[no LineTerminator here]` @@ -1082,7 +1098,7 @@ def visit_error_symbols() -> typing.Iterator[None]: self.add_edge(s, asi_nt, dest) if "}" in offending_tokens: - self.add_edge(s, "}", reduce_asi.index) + self.add_edge(s, "}", reduce_asi["}"].index) offending_tokens.remove("}") if error == ErrorSymbol("asi"): @@ -1097,15 +1113,15 @@ def visit_error_symbols() -> typing.Iterator[None]: raise ValueError("Unexpected ErrorSymbol code") for token in offending_tokens: - self.add_edge(s, token, offending_target.index) + self.add_edge(s, token, offending_target[token].index) for term in restricted_tokens: if isinstance(term, (str, End)): - self.add_edge(s, term, newline_asi.index) + self.add_edge(s, term, newline_asi[token].index) consume(visit_error_symbols(), progress) if verbose: - print("Expand JavaScript Automatic Semicolon Insertion") + print("Expand JavaScript Automatic Semicolon Insertion result:") self.debug_dump() def fix_with_context(self, s: StateId, aps_lanes: typing.List[APS]) -> None: From 8042d1ff3ebca80623ce025bf559395029074fa1 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 6 Aug 2020 18:16:23 +0200 Subject: [PATCH 05/11] Add a new way to fix inconsistencies by using existing conditionals. When we have conditionals which are checking whether the previous character is on a new line or not, such as `[no LineTerminator here]`, it is safe to wrap any non-garded edge with both the condition and its negated version. This way, the resolution of lookahead or reduce-reduce and shift-reduce conflict, does not have to push the check for new lines further than necessary. --- jsparagus/actions.py | 28 +++++++++++++++++- jsparagus/parse_table.py | 64 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/jsparagus/actions.py b/jsparagus/actions.py index ac10b7425..456754b39 100644 --- a/jsparagus/actions.py +++ b/jsparagus/actions.py @@ -108,6 +108,18 @@ def condition(self) -> Action: "Return the conditional action." raise TypeError("Action.condition not implemented") + def can_negate(self) -> bool: + "Whether the current condition (action) implemented the negate function." + assert self.is_condition() + return False + + def negate(self, covered: typing.List[Action]) -> typing.List[Action]: + """Given a list of conditions, returns the condition which check the same + variable but all the values which are not covered by the rest of the + conditions, by adding it to the list which is returned.""" + assert self.can_negate() + raise TypeError("Action.negate not implemented") + def check_same_variable(self, other: Action) -> bool: "Return whether both conditionals are checking the same variable." assert self.is_condition() @@ -392,6 +404,7 @@ class CheckLineTerminator(Action): def __init__(self, offset: int = 0, is_on_new_line: bool = False) -> None: # assert offset >= -1 and "Smaller offsets are not supported on all backends." super().__init__() + assert offset >= -1 self.offset = offset self.is_on_new_line = is_on_new_line @@ -408,11 +421,24 @@ def is_condition(self) -> bool: def condition(self) -> CheckLineTerminator: return self + def can_negate(self) -> bool: + "Unordered condition, which accept or not to reach the next state." + return True + + def negate(self, covered: typing.List[Action]) -> typing.List[Action]: + assert len(covered) >= 1 and len(covered) <= 2 + if len(covered) == 2: + return covered + assert covered[0] == self + negated = CheckLineTerminator(self.offset, not self.is_on_new_line) + return [self, negated] + def check_same_variable(self, other: Action) -> bool: return isinstance(other, CheckLineTerminator) and self.offset == other.offset def check_different_values(self, other: Action) -> bool: - return False + assert isinstance(other, CheckLineTerminator) + return self.is_on_new_line != other.is_on_new_line def shifted_action(self, shifted_term: Element) -> ShiftedAction: if isinstance(shifted_term, Nt): diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index 56c4cdde5..0d48fd8c3 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -1335,6 +1335,56 @@ def fix_with_context(self, s: StateId, aps_lanes: typing.List[APS]) -> None: # self.remove_unreachable_states(maybe_unreachable_set) # pass + def try_fix_with_conditions(self, s: StateId) -> bool: + """When dealing with consistent conditions, we can simply move all the + inconsistencies under the condition and its negated counter-part. This + is equivalent to changing the order in which variable are compared, + given condition which are not dependent on each others. """ + + state = self.states[s] + assert state.is_inconsistent() + conditions: typing.List[Action] = [] + for act, dest in state.epsilon: + if act.is_condition() and not act.is_inconsistent() and act.can_negate(): + if conditions == []: + conditions.append(act) + elif conditions[0].check_same_variable(act): + conditions.append(act) + + # If we have not found any consistent condition, then fallback on using + # other way of solving inconsistencies. + if conditions == []: + return False + + # The set of conditions extracted would remain inconsistent even after + # fix_with_conditions. + pairs = itertools.combinations(conditions, 2) + if any(not k1.check_different_values(k2) for k1, k2 in pairs): + return False + + # If the list of condition does not yet cover the space of possible + # value, add the missing actions, with which we should wrap all the + # cases which have no conditionals. + conditions = conditions[0].negate(conditions) + + shift_map: typing.DefaultDict[ + Term, + typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] + ] + shift_map = collections.defaultdict(lambda: []) + edges = list(iter(state.edges())) + for term, dest in edges: + if term in conditions: + shift_map[term].append((self.states[dest], [])) + else: + for cond in conditions: + shift_map[cond].append((self.states[dest], [Edge(s, term)])) + + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + self.restore_edges(state, shift_map, maybe_unreachable_set) + self.remove_unreachable_states(maybe_unreachable_set) + return True + def fix_with_lookahead(self, s: StateId, aps_lanes: typing.List[APS]) -> None: # Find the list of terminals following each actions (even reduce # actions). @@ -1416,11 +1466,24 @@ def fix_inconsistent_state(self, s: StateId, verbose: bool) -> bool: return False all_reduce = all(a.update_stack() for a, _ in state.epsilon) + any_conditional = any(a.is_condition() and a.can_negate() for a, _ in state.epsilon) any_shift = (len(state.terminals) + len(state.nonterminals) + len(state.errors)) > 0 + try_with_conditionals = any_conditional try_with_context = all_reduce and not any_shift try_with_lookahead = not try_with_context # if verbose: # print(aps_lanes_str(aps_lanes, "fix_inconsistent_state:", "\taps")) + if try_with_conditionals: + if verbose: + print("\tFix with conditionals") + fixed = self.try_fix_with_conditions(s) + if fixed: + try_with_context = False + try_with_lookahead = False + elif verbose and try_with_context: + print("\tFallback on fixing with context.") + elif verbose and try_with_context: + print("\tFallback on fixing with lookahead.") if try_with_context: if verbose: print("\tFix with context.") @@ -1436,6 +1499,7 @@ def fix_inconsistent_state(self, s: StateId, verbose: bool) -> bool: aps_lanes = self.lookahead_lanes(s) assert aps_lanes != [] self.fix_with_lookahead(s, aps_lanes) + assert not state.is_inconsistent() return True def fix_inconsistent_table(self, verbose: bool, progress: bool) -> None: From 2a8f80d25118077e4cf6595c02af5ff3f91d64bf Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 6 Aug 2020 18:21:39 +0200 Subject: [PATCH 06/11] Simplify fix_inconsistent_table. This change adds `mark_sweep_states` function to replace unreachable states by `None`, and prevent them from being reused. This removes the need for tracking inconsistent states which are potentially unreachable, but we are to re-scan the whole parse table in order to refill the todo list in cased of missed inconsistencies. --- jsparagus/parse_table.py | 104 +++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 47 deletions(-) diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index 0d48fd8c3..ce5ddab2c 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -709,6 +709,28 @@ def remove_unreachable_states( self.remove_state(s, next_set) maybe_unreachable_set = next_set + def mark_sweep_states(self) -> None: + marked = set() + + def mark(s: StateId) -> None: + marked.add(s) + for _, dest in self.states[s].edges(): + if dest not in marked: + mark(dest) + + for _, idx in self.named_goals: + mark(idx) + + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + remove_list = [] + for s in self.states: + if s is not None and s.index not in marked: + remove_list.append(s) + for s in remove_list: + self.clear_edges(s, maybe_unreachable_set) + for s in remove_list: + self.remove_state(s.index, maybe_unreachable_set) + def is_reachable_state(self, s: StateId) -> bool: """Check whether the current state is reachable or not.""" if self.states[s] is None: @@ -1527,54 +1549,41 @@ def fix_inconsistent_table(self, verbose: bool, progress: bool) -> None: def visit_table() -> typing.Iterator[None]: nonlocal count - unreachable = [] while todo: - while todo: - yield # progress bar. - # TODO: Compare stack / queue, for the traversal of the states. - s = todo.popleft() - if not self.is_reachable_state(s): - # NOTE: We do not fix unreachable states, as we might - # not be able to compute the reduce actions. However, - # we should not clean edges not backedges as the state - # might become reachable later on, since states are - # shared if they have the same locations. - unreachable.append(s) - continue - assert self.states[s].is_inconsistent() - start_len = len(self.states) - if verbose: - count = count + 1 - print("Fixing state {}\n".format(self.states[s].stable_str(self.states))) - try: - self.fix_inconsistent_state(s, verbose) - except Exception as exc: - self.debug_info = True - raise ValueError( - "Error while fixing conflict in state {}\n\n" - "In the following grammar productions:\n{}" - .format(self.states[s].stable_str(self.states), - self.debug_context(s, "\n", "\t")) - ) from exc - new_inconsistent_states = [ - s.index for s in self.states[start_len:] - if s.is_inconsistent() - ] - if verbose: - print("\tAdding {} states".format(len(self.states[start_len:]))) - print("\tWith {} inconsistent states".format(len(new_inconsistent_states))) - todo.extend(new_inconsistent_states) - - # Check whether none of the previously inconsistent and - # unreahable state became reachable. If so add it back to the - # todo list. - still_unreachable = [] - for s in unreachable: - if self.is_reachable_state(s): - todo.append(s) - else: - still_unreachable.append(s) - unreachable = still_unreachable + yield # progress bar. + # TODO: Compare stack / queue, for the traversal of the states. + s = todo.popleft() + if self.states[s] is None: + continue + assert self.states[s].is_inconsistent() + start_len = len(self.states) + if verbose: + count = count + 1 + print("Fixing state {}\n".format(self.states[s].stable_str(self.states))) + try: + self.fix_inconsistent_state(s, verbose) + except Exception as exc: + self.debug_info = True + raise ValueError( + "Error while fixing conflict in state {}\n\n" + "In the following grammar productions:\n{}" + .format(self.states[s].stable_str(self.states), + self.debug_context(s, "\n", "\t")) + ) from exc + new_inconsistent_states = [ + s.index for s in self.states[start_len:] + if s.is_inconsistent() + ] + if verbose: + print("\tAdding {} states".format(len(self.states[start_len:]))) + print("\tWith {} inconsistent states".format(len(new_inconsistent_states))) + + todo.extend(new_inconsistent_states) + self.mark_sweep_states() + if not todo: + for state in self.states: + if state is not None and state.is_inconsistent(): + todo.append(state.index) consume(visit_table(), progress) if verbose: @@ -1589,6 +1598,7 @@ def visit_table() -> typing.Iterator[None]: if verbose: print("Fix Inconsistent Table Result:") self.debug_dump() + self.debug_info = False def remove_all_unreachable_state(self, verbose: bool, progress: bool) -> None: self.states = [s for s in self.states if s is not None] From 71bd30cba38b5ab4dfff7cf4f458d364ce6c0c3a Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 7 Aug 2020 17:03:07 +0200 Subject: [PATCH 07/11] Add Shift action to register these as delayed action in restore_edges. When restore_edges adds a consistent action in front of shifted terminals and non-terminals, we want to record this as we want to annotate the target destination while adding the fact that we are lacking a shifted token. --- jsparagus/actions.py | 31 +++++++++++++++++++++++++++++++ jsparagus/parse_table.py | 24 +++++++++++++----------- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/jsparagus/actions.py b/jsparagus/actions.py index 456754b39..f4a3dce7b 100644 --- a/jsparagus/actions.py +++ b/jsparagus/actions.py @@ -226,6 +226,37 @@ def stable_str(self, states: typing.Any) -> str: ShiftedAction = typing.Union[Action, bool] +class Shift(Action): + """Shift action is the implicit action performed when a terminal or nonterminal + is used on an edge. However this state is not supported as an epsilon edge, + but only serves to annotate delayed actions of states. """ + __slots__ = ['term'] + + term: ShiftedTerm + + def __init__(self, term: ShiftedTerm): + super().__init__() + self.term = term + + def is_inconsistent(self) -> bool: + return True + + def is_condition(self) -> bool: + return True + + def condition(self) -> Shift: + return self + + def update_stack(self) -> bool: + return True + + def update_stack_with(self) -> StackDiff: + return StackDiff(0, None, -1) + + def __str__(self) -> str: + return "Shift({})".format(str(self.term)) + + class Replay(Action): """Replay a term which was previously saved by the Unwind function. Note that this does not Shift a term given as argument as the replay action should diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index ce5ddab2c..eea29c090 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -11,8 +11,8 @@ from . import types from .utils import consume, keep_until, split, default_id_dict, default_fwd_dict from .ordered import OrderedSet, OrderedFrozenSet -from .actions import (Action, Replay, Reduce, Unwind, FilterStates, Seq, - CheckLineTerminator) +from .actions import (Action, Shift, Replay, Reduce, Unwind, FilterStates, Seq, + FunCall, CheckLineTerminator) from .grammar import End, ErrorSymbol, InitNt, Nt from .rewrites import CanonicalGrammar from .lr0 import LR0Generator, Term @@ -626,21 +626,23 @@ def restore_edges( if not self.term_is_shifted(term): # There is no more target after a reduce action. actions_list = [] - for target, actions in actions_list: + for target, remaining_edges in actions_list: assert isinstance(target, StateAndTransitions) locations |= target.locations delayed |= target.delayed_actions - if actions != []: + if remaining_edges != []: # Pull edges, with delayed actions. - edge = actions[0] - assert isinstance(edge, Edge) - for action in actions: - action_term = action.term - assert isinstance(action_term, Action) - delayed.add(action_term) + for edge in remaining_edges: + edge_term = edge.term + assert edge_term is not None + if isinstance(edge_term, Action): + delayed.add(edge_term) + else: + delayed.add(Shift(edge_term)) + edge = remaining_edges[0] edge_term = edge.term assert edge_term is not None - new_shift_map[edge_term].append((target, actions[1:])) + new_shift_map[edge_term].append((target, remaining_edges[1:])) recurse = True else: # Pull edges, as a copy of existing edges. From 1336cececa6300c33196b6894753047ee512e117 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 7 Aug 2020 17:13:05 +0200 Subject: [PATCH 08/11] Prevent aliasing of Reduce actions. Lower Reduce Action lookup for the state which would be used to shift the non-terminal, and then encode the Replay action based on the the state where we are reducing the nonterminal. However, this strategy alone does not work when reduce actions are shared across, as having different lookahead with the same reduce actions, we generate multiple Replay paths for the same state which shift the reduced nonterminal. Adding the Lookahead terms to the reduce action, ensure that we would not add sharing if we did not have sharing in LR0 before. --- jsparagus/actions.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/jsparagus/actions.py b/jsparagus/actions.py index f4a3dce7b..ade19a9dc 100644 --- a/jsparagus/actions.py +++ b/jsparagus/actions.py @@ -322,11 +322,14 @@ def shifted_action(self, shifted_term: Element) -> Unwind: class Reduce(Action): """Prevent the fall-through to the epsilon transition and returns to the shift table execution to resume shifting or replaying terms.""" - __slots__ = ['unwind'] + __slots__ = ['unwind', 'lookahead'] unwind: Unwind - def __init__(self, unwind: Unwind) -> None: + # List of lookahead tokens used to prevent aliasing of reduce states. + lookahead: typing.Tuple[Element, ...] + + def __init__(self, unwind: Unwind, lookahead: typing.Tuple[Element, ...] = ()) -> None: nt_name = unwind.nt.name if isinstance(nt_name, InitNt): name = "Start_" + str(nt_name.goal.name) @@ -334,9 +337,10 @@ def __init__(self, unwind: Unwind) -> None: name = nt_name super().__init__() self.unwind = unwind + self.lookahead = lookahead def __str__(self) -> str: - return "Reduce({})".format(str(self.unwind)) + return "Reduce({}, {})".format(str(self.unwind), str(self.lookahead)) def follow_edge(self) -> bool: return False @@ -349,11 +353,11 @@ def update_stack_with(self) -> StackDiff: def unshift_action(self, num: int) -> Reduce: unwind = self.unwind.unshift_action(num) - return Reduce(unwind) + return Reduce(unwind, lookahead=self.lookahead[:-num]) def shifted_action(self, shifted_term: Element) -> Reduce: unwind = self.unwind.shifted_action(shifted_term) - return Reduce(unwind) + return Reduce(unwind, lookahead=(*self.lookahead, shifted_term)) class Accept(Action): From 7b2a5658668cebaf4081833887066b4a5f640d40 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 7 Aug 2020 17:25:08 +0200 Subject: [PATCH 09/11] Add FunCall to ASI to generate an empty value. The Rust backend expect that we have a FunCall which produces a "value", to be wrapped. This modification adds the FunCall as part of the AstBuilder to produce a value. --- jsparagus/actions.py | 2 +- jsparagus/parse_table.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/jsparagus/actions.py b/jsparagus/actions.py index ade19a9dc..d73f8f591 100644 --- a/jsparagus/actions.py +++ b/jsparagus/actions.py @@ -623,7 +623,7 @@ def __init__( args: typing.Tuple[OutputExpr, ...], trait: types.Type = types.Type("AstBuilder"), fallible: bool = False, - set_to: str = "val", + set_to: str = "value", offset: int = 0, ) -> None: super().__init__() diff --git a/jsparagus/parse_table.py b/jsparagus/parse_table.py index eea29c090..772e237f6 100644 --- a/jsparagus/parse_table.py +++ b/jsparagus/parse_table.py @@ -1049,21 +1049,22 @@ def expand_javascript_asi(self, verbose: bool, progress: bool) -> None: dead_end = self.get_state(OrderedFrozenSet()) # Add Reduce ErrorSymbol("asi") - asi_nt = Nt(str(ErrorSymbol("asi"))) + asi_nt = Nt("ASI") + asi_term = Seq([FunCall("asi", ()), Reduce(Unwind(asi_nt, 0, 0))]) self.nonterminals.append(asi_nt) reduce_asi: typing.Dict[ShiftedTerm, StateAndTransitions] = {} for t in self.terminals: reduce_asi[t] = self.get_state( - OrderedFrozenSet(['ErrorSymbol("asi") ::= {} ·'.format(str(t)), - 'ErrorSymbol("asi") ::= [LineTerminator here] {} ·'.format(str(t))])) - self.add_edge(reduce_asi[t], Reduce(Unwind(asi_nt, 0, 1)), dead_end.index) + OrderedFrozenSet(['ASI ::= {} ·'.format(str(t)), + 'ASI ::= [LineTerminator here] {} ·'.format(str(t))])) + self.add_edge(reduce_asi[t], asi_term.shifted_action(t), dead_end.index) # Add CheckLineTerminator -> Reduce ErrorSymbol("asi") newline_asi_term = CheckLineTerminator(-1, True) newline_asi: typing.Dict[ShiftedTerm, StateAndTransitions] = {} for t in self.terminals: newline_asi[t] = self.get_state( - OrderedFrozenSet(['ErrorSymbol("asi") ::= [LineTerminator here] {} ·'.format(str(t))]), + OrderedFrozenSet(['ASI ::= [LineTerminator here] {} ·'.format(str(t))]), OrderedFrozenSet([newline_asi_term])) self.add_edge(newline_asi[t], newline_asi_term, reduce_asi[t].index) From 78bd1ff4de927c517e6f4d22178af81b10d2b3b3 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 7 Aug 2020 17:31:22 +0200 Subject: [PATCH 10/11] Rust backend: Implement CheckLineTerminator conditions Previously, the condition was only checking in order to error at runtime. Now that the errors are handled at compile time, we can remove all code path handling automatic semicolon insertion as an error case. --- crates/generated_parser/src/traits/mod.rs | 2 +- crates/parser/src/parser.rs | 53 +++-------------------- crates/parser/src/simulator.rs | 4 +- jsparagus/emit/rust.py | 39 ++++++++++++----- 4 files changed, 37 insertions(+), 61 deletions(-) diff --git a/crates/generated_parser/src/traits/mod.rs b/crates/generated_parser/src/traits/mod.rs index 69e2c7dae..4c2386ec5 100644 --- a/crates/generated_parser/src/traits/mod.rs +++ b/crates/generated_parser/src/traits/mod.rs @@ -32,5 +32,5 @@ pub trait ParserTrait<'alloc, Value> { fn replay(&mut self, tv: TermValue); fn epsilon(&mut self, state: usize); fn top_state(&self) -> usize; - fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool>; + fn is_on_new_line(&self) -> Result<'alloc, bool>; } diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs index c9f12a56e..876321266 100644 --- a/crates/parser/src/parser.rs +++ b/crates/parser/src/parser.rs @@ -96,19 +96,13 @@ impl<'alloc> ParserTrait<'alloc, StackValue<'alloc>> for Parser<'alloc> { fn top_state(&self) -> usize { self.state() } - fn check_not_on_new_line(&mut self, peek: usize) -> Result<'alloc, bool> { + fn is_on_new_line(&self) -> Result<'alloc, bool> { let sv = { let stack = self.node_stack.stack_slice(); - &stack[stack.len() - peek].value + &stack[stack.len() - 1].value }; if let StackValue::Token(ref token) = sv { - if !token.is_on_new_line { - return Ok(true); - } - self.rewind(peek - 1); - let tv = self.pop(); - self.try_error_handling(tv)?; - return Ok(false); + return Ok(token.is_on_new_line); } Err(ParseError::NoLineTerminatorHereExpectedToken.into()) } @@ -194,51 +188,14 @@ impl<'alloc> Parser<'alloc> { }), }); if let StackValue::Token(ref token) = t.value { - // Error tokens might them-self cause more errors to be reported. - // This happens due to the fact that the ErrorToken can be replayed, - // and while the ErrorToken might be in the lookahead rules, it - // might not be in the shifted terms coming after the reduced - // nonterminal. - if t.term == TerminalId::ErrorToken.into() { - return Err(Self::parse_error(token).into()); - } - - // Otherwise, check if the current rule accept an Automatic - // Semi-Colon insertion (ASI). - let state = self.state(); - assert!(state < TABLES.shift_count); - let error_code = TABLES.error_codes[state]; - if let Some(error_code) = error_code { - let err_token = (*token).clone(); - Self::recover(token, error_code)?; - self.replay(t); - let err_token = self.handler.alloc(err_token); - self.replay(TermValue { - term: TerminalId::ErrorToken.into(), - value: StackValue::Token(err_token), - }); - return Ok(false); - } // On error, don't attempt error handling again. return Err(Self::parse_error(token).into()); } Err(ParseError::ParserCannotUnpackToken.into()) } - pub(crate) fn recover(t: &Token, error_code: ErrorCode) -> Result<'alloc, ()> { - match error_code { - ErrorCode::Asi => { - if t.is_on_new_line - || t.terminal_id == TerminalId::End - || t.terminal_id == TerminalId::CloseBrace - { - Ok(()) - } else { - Err(Self::parse_error(t).into()) - } - } - ErrorCode::DoWhileAsi => Ok(()), - } + pub(crate) fn recover(t: &Token, _error_code: ErrorCode) -> Result<'alloc, ()> { + Err(Self::parse_error(t).into()) } fn simulator<'a>(&'a self) -> Simulator<'alloc, 'a> { diff --git a/crates/parser/src/simulator.rs b/crates/parser/src/simulator.rs index 96773e1d6..4d828149a 100644 --- a/crates/parser/src/simulator.rs +++ b/crates/parser/src/simulator.rs @@ -114,8 +114,8 @@ impl<'alloc, 'parser> ParserTrait<'alloc, ()> for Simulator<'alloc, 'parser> { fn top_state(&self) -> usize { self.state() } - fn check_not_on_new_line(&mut self, _peek: usize) -> Result<'alloc, bool> { - Ok(true) + fn is_on_new_line(&self) -> Result<'alloc, bool> { + Ok(false) } } diff --git a/jsparagus/emit/rust.py b/jsparagus/emit/rust.py index bd60beb94..a1945e816 100644 --- a/jsparagus/emit/rust.py +++ b/jsparagus/emit/rust.py @@ -230,17 +230,36 @@ def write_condition(self, state, first_act): # of new lines, and one which is shifting an extra error token. # This might help remove the overhead of backtracking in addition # to make this backtracking visible through APS. - assert len(list(state.edges())) == 1 - act, dest = next(state.edges()) assert len(self.replay_args) == 0 - assert -act.offset > 0 - assert act.is_on_new_line == False - self.write("// {}", str(act)) - self.write("if !parser.check_not_on_new_line({})? {{", -act.offset) - with indent(self): - self.write("return Ok(false);") - self.write("}") - self.write_epsilon_transition(dest) + assert first_act.offset == -1 + if len(state.epsilon) == 1: + # Generate if-not-then-error code. + act, dest = next(state.edges()) + test = "!" if act.is_on_new_line else "" + self.write("// {}", str(act)) + self.write("if {}parser.is_on_new_line()? {{", test) + with indent(self): + self.write("return Ok(false);") + self.write("}") + self.write_epsilon_transition(dest) + else: + # Generate if-else code + assert len(list(state.edges())) == 2 + edges = state.edges() + true_act, true_dest = next(edges) + if not true_act.is_on_new_line: + false_act, false_dest = true_act, true_dest + true_act, true_dest = next(edges) + else: + false_act, false_dest = next(edges) + self.write("// {} & {}", str(true_act), str(false_act)) + self.write("if parser.is_on_new_line()? {") + with indent(self): + self.write_epsilon_transition(true_dest) + self.write("} else {") + with indent(self): + self.write_epsilon_transition(false_dest) + self.write("}") elif isinstance(first_act, FilterStates): if len(state.epsilon) == 1: # This is an attempt to avoid huge unending compilations. From 89027eae129ba0842ad70e97ea59af29424b8d97 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 7 Aug 2020 17:33:52 +0200 Subject: [PATCH 11/11] Add AstBuilder asi function for the Rust backend. --- crates/ast/ast.json | 3 +++ crates/ast/src/source_location.rs | 4 +++- crates/generated_parser/src/ast_builder.rs | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/ast/ast.json b/crates/ast/ast.json index e02d91648..14157f505 100644 --- a/crates/ast/ast.json +++ b/crates/ast/ast.json @@ -2,6 +2,9 @@ "Void": { "_type": "enum" }, + "Asi": { + "_type": "struct" + }, "Argument": { "_type": "enum", "SpreadElement": "Box", diff --git a/crates/ast/src/source_location.rs b/crates/ast/src/source_location.rs index 2219fa451..78fdb2e23 100644 --- a/crates/ast/src/source_location.rs +++ b/crates/ast/src/source_location.rs @@ -25,8 +25,10 @@ impl SourceLocation { self.start = start.start; self.end = end.end; } +} - pub fn default() -> Self { +impl Default for SourceLocation { + fn default() -> Self { Self { start: 0, end: 0 } } } diff --git a/crates/generated_parser/src/ast_builder.rs b/crates/generated_parser/src/ast_builder.rs index ca134da86..5d8216516 100644 --- a/crates/generated_parser/src/ast_builder.rs +++ b/crates/generated_parser/src/ast_builder.rs @@ -84,6 +84,11 @@ impl<'alloc> AstBuilder<'alloc> { list.append(elements); } + // Automatic Semicolon Insertion + pub fn asi(&self) -> arena::Box<'alloc, Asi> { + self.alloc_with(|| Asi::default()) + } + // IdentifierReference : Identifier pub fn identifier_reference( &self,