From f6e4594f988cb782528af721bb269547fe44d5c1 Mon Sep 17 00:00:00 2001 From: memento Date: Mon, 5 Dec 2022 12:21:55 -0600 Subject: [PATCH 1/4] (fix) Proposing to collect empty separation lines with comments --- refactor/ast.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++- tests/test_ast.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/refactor/ast.py b/refactor/ast.py index 3d23262..79f46db 100644 --- a/refactor/ast.py +++ b/refactor/ast.py @@ -255,5 +255,57 @@ def retrieve_segment(self, node: ast.AST, segment: str) -> None: self.fill() self.write(segment) +class PreciseEmptyLinesUnparser(PreciseUnparser): + """A more precise version of the default unparser, + with various improvements such as comment handling + for major statements and child node recovery.""" + + @contextmanager + def _collect_stmt_comments(self, node: ast.AST) -> Iterator[None]: + def _write_if_unseen_comment( + line_no: int, + line: str, + comment_begin: int, + ) -> None: + if line_no in self._visited_comment_lines: + # We have already written this comment as the + # end of another node. No need to re-write it. + return + + self.fill() + self.write(line[comment_begin:]) + self._visited_comment_lines.add(line_no) + + assert self.source is not None + lines = self.source.splitlines() + node_start, node_end = node.lineno - 1, cast(int, node.end_lineno) + + # Collect comments in the reverse order, so we can properly + # identify the end of the current comment block. + preceding_comments = [] + for offset, line in enumerate(reversed(lines[:node_start])): + comment_begin = line.find("#") + if (line or line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): + break + + preceding_comments.append((node_start - offset, line, node.col_offset)) + + for comment_info in reversed(preceding_comments): + _write_if_unseen_comment(*comment_info) + + yield + + for offset, line in enumerate(lines[node_end:], 1): + comment_begin = line.find("#") + if (line or line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): + break + + _write_if_unseen_comment( + line_no=node_end + offset, + line=line, + comment_begin=node.col_offset, + ) + + -UNPARSER_BACKENDS = {"fast": BaseUnparser, "precise": PreciseUnparser} +UNPARSER_BACKENDS = {"fast": BaseUnparser, "precise": PreciseUnparser, "precise_with_empty_lines": PreciseEmptyLinesUnparser} diff --git a/tests/test_ast.py b/tests/test_ast.py index 1e5dc1d..2463a4c 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -7,7 +7,7 @@ import pytest from refactor import common -from refactor.ast import BaseUnparser, PreciseUnparser, split_lines +from refactor.ast import BaseUnparser, PreciseUnparser, split_lines, PreciseEmptyLinesUnparser def test_split_lines(): @@ -240,3 +240,55 @@ def foo(): base = PreciseUnparser(source=source) assert base.unparse(tree) + "\n" == expected_src + +def test_precise_unparser_comments_empty_lines(): + source = textwrap.dedent( + """\ + def foo(): + # unindented comment + # indented but not connected comment + + # a + # a1 + print() + # a2 + print() + # b + + # b2 + print( + c # e + ) + # c + print(d) + # final comment + """ + ) + + expected_src = textwrap.dedent( + """\ + def foo(): + # indented but not connected comment + + # a + # a1 + print() + # a2 + print() + # b + + # b2 + print( + c # e + ) + # c + """ + ) + + tree = ast.parse(source) + + # # Remove the print(d) + tree.body[0].body.pop() + + base = PreciseEmptyLinesUnparser(source=source) + assert base.unparse(tree) + "\n" == expected_src From 3e6a3d9b8d0bfa43ab7cd239e24428fc580a7cc2 Mon Sep 17 00:00:00 2001 From: memento Date: Tue, 6 Dec 2022 09:00:49 -0600 Subject: [PATCH 2/4] (fix) Proposing to collect empty separation lines with comments --- refactor/ast.py | 4 +- tests/test_ast.py | 123 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 93 insertions(+), 34 deletions(-) diff --git a/refactor/ast.py b/refactor/ast.py index 79f46db..3f7aeb6 100644 --- a/refactor/ast.py +++ b/refactor/ast.py @@ -285,7 +285,7 @@ def _write_if_unseen_comment( preceding_comments = [] for offset, line in enumerate(reversed(lines[:node_start])): comment_begin = line.find("#") - if (line or line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): + if (line and not line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): break preceding_comments.append((node_start - offset, line, node.col_offset)) @@ -297,7 +297,7 @@ def _write_if_unseen_comment( for offset, line in enumerate(lines[node_end:], 1): comment_begin = line.find("#") - if (line or line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): + if (line and not line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): break _write_if_unseen_comment( diff --git a/tests/test_ast.py b/tests/test_ast.py index 2463a4c..a99871d 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -241,50 +241,109 @@ def foo(): base = PreciseUnparser(source=source) assert base.unparse(tree) + "\n" == expected_src -def test_precise_unparser_comments_empty_lines(): + + +def test_precise_empty_lines_unparser(): source = textwrap.dedent( """\ - def foo(): - # unindented comment - # indented but not connected comment + def func(): + if something: + print( + call(.1), + maybe+something_else, + maybe / other, + thing . a + ) + """ + ) - # a - # a1 - print() - # a2 - print() - # b + expected_src = textwrap.dedent( + """\ + def func(): + if something: + print(call(.1), maybe+something_else, maybe / other, thing . a, 3) + """ + ) - # b2 - print( - c # e - ) - # c - print(d) - # final comment + tree = ast.parse(source) + tree.body[0].body[0].body[0].value.args.append(ast.Constant(3)) + + base = PreciseEmptyLinesUnparser(source=source) + assert base.unparse(tree) + "\n" == expected_src + + +def test_precise_empty_lines_unparser_indented_literals(): + source = textwrap.dedent( + """\ + def func(): + if something: + print( + "bleh" + "zoom" + ) """ ) expected_src = textwrap.dedent( """\ - def foo(): - # indented but not connected comment - - # a - # a1 - print() - # a2 - print() - # b - - # b2 - print( - c # e - ) - # c + def func(): + if something: + print("bleh" + "zoom", 3) """ ) + tree = ast.parse(source) + tree.body[0].body[0].body[0].value.args.append(ast.Constant(3)) + + base = PreciseEmptyLinesUnparser(source=source) + assert base.unparse(tree) + "\n" == expected_src + +def test_precise_empty_lines_unparser_comments(): + #source = textwrap.dedent( + source = ( + """\ +def foo(): +# unindented comment + # indented but not connected comment + + # a + # a1 + print() + # a2 + print() + # b + + # b2 + print( + c # e + ) + # c + print(d) + # final comment +""" + ) + + expected_src = ( + """\ +def foo(): + # indented but not connected comment + + # a + # a1 + print() + # a2 + print() + # b + + # b2 + print( + c # e + ) + # c +""" + ) + tree = ast.parse(source) # # Remove the print(d) From 888d24ecc6ea59a90ffc4cae76e49d9e3b519c26 Mon Sep 17 00:00:00 2001 From: memento Date: Sat, 10 Dec 2022 11:20:07 -0600 Subject: [PATCH 3/4] (fix) Duplicated empty lines --- refactor/ast.py | 2 +- tests/test_ast.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/refactor/ast.py b/refactor/ast.py index 3f7aeb6..77cbfdc 100644 --- a/refactor/ast.py +++ b/refactor/ast.py @@ -297,7 +297,7 @@ def _write_if_unseen_comment( for offset, line in enumerate(lines[node_end:], 1): comment_begin = line.find("#") - if (line and not line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): + if comment_begin == -1 or comment_begin != node.col_offset: break _write_if_unseen_comment( diff --git a/tests/test_ast.py b/tests/test_ast.py index a99871d..fe64a0f 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -300,8 +300,7 @@ def func(): assert base.unparse(tree) + "\n" == expected_src def test_precise_empty_lines_unparser_comments(): - #source = textwrap.dedent( - source = ( + source = textwrap.dedent( """\ def foo(): # unindented comment From 8e9d33188044fd354a847f9d92872b35c4ba62a6 Mon Sep 17 00:00:00 2001 From: memento Date: Mon, 26 Dec 2022 10:20:37 -0600 Subject: [PATCH 4/4] Addressing the feedback from review --- refactor/ast.py | 63 ++++++----------------------------------------- tests/test_ast.py | 12 ++++----- 2 files changed, 13 insertions(+), 62 deletions(-) diff --git a/refactor/ast.py b/refactor/ast.py index 77cbfdc..d1a5c4f 100644 --- a/refactor/ast.py +++ b/refactor/ast.py @@ -139,6 +139,7 @@ class PreciseUnparser(BaseUnparser): for major statements and child node recovery.""" def __init__(self, *args: Any, **kwargs: Any) -> None: + self.empty_lines = kwargs.pop("empty_lines", None) self._visited_comment_lines: set[int] = set() super().__init__(*args, **kwargs) @@ -222,10 +223,12 @@ def _write_if_unseen_comment( preceding_comments = [] for offset, line in enumerate(reversed(lines[:node_start])): comment_begin = line.find("#") - if comment_begin == -1 or comment_begin != node.col_offset: + not_valid_comment: bool = comment_begin == -1 or comment_begin != node.col_offset + no_empty_lines: bool = not self.empty_lines or line and not line.isspace() + if no_empty_lines and not_valid_comment: break - preceding_comments.append((node_start - offset, line, comment_begin)) + preceding_comments.append((node_start - offset, line, node.col_offset)) for comment_info in reversed(preceding_comments): _write_if_unseen_comment(*comment_info) @@ -240,7 +243,7 @@ def _write_if_unseen_comment( _write_if_unseen_comment( line_no=node_end + offset, line=line, - comment_begin=comment_begin, + comment_begin=node.col_offset, ) def collect_comments(self, node: ast.AST) -> ContextManager[None]: @@ -255,57 +258,5 @@ def retrieve_segment(self, node: ast.AST, segment: str) -> None: self.fill() self.write(segment) -class PreciseEmptyLinesUnparser(PreciseUnparser): - """A more precise version of the default unparser, - with various improvements such as comment handling - for major statements and child node recovery.""" - - @contextmanager - def _collect_stmt_comments(self, node: ast.AST) -> Iterator[None]: - def _write_if_unseen_comment( - line_no: int, - line: str, - comment_begin: int, - ) -> None: - if line_no in self._visited_comment_lines: - # We have already written this comment as the - # end of another node. No need to re-write it. - return - - self.fill() - self.write(line[comment_begin:]) - self._visited_comment_lines.add(line_no) - - assert self.source is not None - lines = self.source.splitlines() - node_start, node_end = node.lineno - 1, cast(int, node.end_lineno) - - # Collect comments in the reverse order, so we can properly - # identify the end of the current comment block. - preceding_comments = [] - for offset, line in enumerate(reversed(lines[:node_start])): - comment_begin = line.find("#") - if (line and not line.isspace()) and (comment_begin == -1 or comment_begin != node.col_offset): - break - - preceding_comments.append((node_start - offset, line, node.col_offset)) - - for comment_info in reversed(preceding_comments): - _write_if_unseen_comment(*comment_info) - - yield - - for offset, line in enumerate(lines[node_end:], 1): - comment_begin = line.find("#") - if comment_begin == -1 or comment_begin != node.col_offset: - break - - _write_if_unseen_comment( - line_no=node_end + offset, - line=line, - comment_begin=node.col_offset, - ) - - -UNPARSER_BACKENDS = {"fast": BaseUnparser, "precise": PreciseUnparser, "precise_with_empty_lines": PreciseEmptyLinesUnparser} +UNPARSER_BACKENDS = {"fast": BaseUnparser, "precise": PreciseUnparser} diff --git a/tests/test_ast.py b/tests/test_ast.py index fe64a0f..b0eddf7 100644 --- a/tests/test_ast.py +++ b/tests/test_ast.py @@ -7,7 +7,7 @@ import pytest from refactor import common -from refactor.ast import BaseUnparser, PreciseUnparser, split_lines, PreciseEmptyLinesUnparser +from refactor.ast import BaseUnparser, PreciseUnparser, split_lines def test_split_lines(): @@ -69,7 +69,7 @@ def test_split_lines_with_encoding(case): else: start_line = lines[lineno][col_offset:] end_line = lines[end_lineno][:end_col_offset] - match = start_line + lines[lineno + 1 : end_lineno].join() + end_line + match = start_line + lines[lineno + 1: end_lineno].join() + end_line assert str(match) == ast.get_source_segment(case, node) @@ -242,7 +242,6 @@ def foo(): assert base.unparse(tree) + "\n" == expected_src - def test_precise_empty_lines_unparser(): source = textwrap.dedent( """\ @@ -268,7 +267,7 @@ def func(): tree = ast.parse(source) tree.body[0].body[0].body[0].value.args.append(ast.Constant(3)) - base = PreciseEmptyLinesUnparser(source=source) + base = PreciseUnparser(source=source, empty_lines=True) assert base.unparse(tree) + "\n" == expected_src @@ -296,9 +295,10 @@ def func(): tree = ast.parse(source) tree.body[0].body[0].body[0].value.args.append(ast.Constant(3)) - base = PreciseEmptyLinesUnparser(source=source) + base = PreciseUnparser(source=source, empty_lines=True) assert base.unparse(tree) + "\n" == expected_src + def test_precise_empty_lines_unparser_comments(): source = textwrap.dedent( """\ @@ -348,5 +348,5 @@ def foo(): # # Remove the print(d) tree.body[0].body.pop() - base = PreciseEmptyLinesUnparser(source=source) + base = PreciseUnparser(source=source, empty_lines=True) assert base.unparse(tree) + "\n" == expected_src