From bdb9c25562c6822d1d2b6caa03777b2666495dcc Mon Sep 17 00:00:00 2001 From: thehunmonkgroup Date: Sun, 28 Dec 2025 18:28:44 -0500 Subject: [PATCH] Add RawText markup handling for raw escape sequences ### Motivation - Allow markup to carry raw byte sequences (escape sequences) so formatters can inject them without affecting visible text layout. - Ensure width calculations ignore injected raw bytes so column sizing remains correct when raw sequences are present. - Make search and string-reconstruction robust to mixed unicode/byte/tuple markup so searches do not match or break on raw bytes. ### Description - Add `vit/markup.py` with a frozen `RawText` dataclass and helper functions: `markup_contains_raw`, `markup_to_bytes`, `normalize_markup`, `markup_display_width`, and `markup_to_str`. - Normalize markup in `TaskTable.build_row_column` by calling `normalize_markup` and compute column widths via `markup_display_width` rather than naive `unicode_len`. - Update `Application.reconstitute_markup_element_as_string` to use `markup_to_str` so search string reconstruction safely handles byte/raw markup. ### Testing - Ran small Python introspection checks including `urwid.util.decompose_tagmarkup([(None, b'foo'), b'bar'])` which returned `(b'foobar', [])`, indicating byte-markup handling behaves as expected (succeeded). - Inspected `TextCanvas` and `StandardTextLayout` source via quick REPL scripts to verify layout/width helpers are available (succeeded). - No full unit test suite or integration tests were run for this change. --- vit/application.py | 5 ++-- vit/markup.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++ vit/task_list.py | 9 ++++-- 3 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 vit/markup.py diff --git a/vit/application.py b/vit/application.py index 52870cb..93f7e2a 100644 --- a/vit/application.py +++ b/vit/application.py @@ -38,6 +38,7 @@ from vit.action_manager import ActionManagerRegistry from vit.denotation import DenotationPopupLauncher from vit.pid_manager import PidManager +from vit.markup import markup_to_str # NOTE: This entire class is a workaround for the fact that urwid catches the # 'ctrl l' keypress in its unhandled_input code, and prevents that from being @@ -571,9 +572,7 @@ def search_loop_warning(self, hit, reverse=False): self.search_display_message(reverse) def reconstitute_markup_element_as_string(self, accum, markup): - if isinstance(markup, tuple): - _, markup = markup - return accum + markup + return accum + markup_to_str(markup) def reconstitute_markup_as_string(self, markup): if isinstance(markup, list): diff --git a/vit/markup.py b/vit/markup.py new file mode 100644 index 0000000..8ecd1bc --- /dev/null +++ b/vit/markup.py @@ -0,0 +1,74 @@ +from __future__ import annotations + +from dataclasses import dataclass + +from urwid.util import calc_width + + +@dataclass(frozen=True) +class RawText: + data: bytes + + +def markup_contains_raw(markup) -> bool: + if isinstance(markup, RawText): + return True + if isinstance(markup, list): + return any(markup_contains_raw(item) for item in markup) + if isinstance(markup, tuple): + _, inner = markup + return markup_contains_raw(inner) + return False + + +def markup_to_bytes(markup, encoding: str = "utf-8"): + if isinstance(markup, RawText): + return markup.data + if isinstance(markup, bytes): + return markup + if isinstance(markup, str): + return markup.encode(encoding) + if isinstance(markup, list): + return [markup_to_bytes(item, encoding=encoding) for item in markup] + if isinstance(markup, tuple): + attr, inner = markup + return (attr, markup_to_bytes(inner, encoding=encoding)) + return markup + + +def normalize_markup(markup, encoding: str = "utf-8"): + if markup_contains_raw(markup): + return markup_to_bytes(markup, encoding=encoding) + return markup + + +def markup_display_width(markup) -> int: + if markup is None: + return 0 + if isinstance(markup, RawText): + return 0 + if isinstance(markup, list): + return sum(markup_display_width(item) for item in markup) + if isinstance(markup, tuple): + _, inner = markup + return markup_display_width(inner) + if isinstance(markup, (str, bytes)): + return calc_width(markup, 0, len(markup)) + return 0 + + +def markup_to_str(markup, encoding: str = "utf-8") -> str: + if markup is None: + return "" + if isinstance(markup, RawText): + return "" + if isinstance(markup, bytes): + return markup.decode(encoding, errors="ignore") + if isinstance(markup, str): + return markup + if isinstance(markup, list): + return "".join(markup_to_str(item, encoding=encoding) for item in markup) + if isinstance(markup, tuple): + _, inner = markup + return markup_to_str(inner, encoding=encoding) + return "" diff --git a/vit/task_list.py b/vit/task_list.py index 34d0d0a..df971a7 100644 --- a/vit/task_list.py +++ b/vit/task_list.py @@ -14,6 +14,7 @@ from vit.list_batcher import ListBatcher from vit.formatter.project import Project as ProjectFormatter from vit.util import unicode_len +from vit.markup import normalize_markup, markup_display_width REDUCE_COLUMN_WIDTH_LIMIT = 20 @@ -266,10 +267,12 @@ def update_column_width(self, idx, current_width, new_width): def build_row_column(self, formatted_value): if isinstance(formatted_value, tuple): - return formatted_value + width, text_markup = formatted_value + return width, normalize_markup(text_markup) else: - width = unicode_len(formatted_value) if formatted_value else 0 - return width, formatted_value + text_markup = normalize_markup(formatted_value) + width = markup_display_width(text_markup) if formatted_value else 0 + return width, text_markup def subproject_indentable(self): return self.config.subproject_indentable and self.report['subproject_indentable']