From 98fa7574b3123004649e237bd1f24d68279c03d7 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Mon, 9 Feb 2026 18:31:38 -0500 Subject: [PATCH 1/2] more mud scanning improvements (#121) - Many terminals have custom palettes for colors 0-16 and that's great, but, when connecting to a remote machine we must assume the "usual" colors intended by the remote author, "solarized" and similar color schemes especially screw up artwork on bbs's, so we fix this by remapping those to their exact RGB color, new arguments: - telnetlib3-client --colormatch --color-brightness --color-contrast --background-color and --reverse-video. - support COM-PORT-OPTION, should help with bbs fingerprinting, many of them use serial bridges for their crappy old DOS programs. - better support misbehaving LINEMODE negotiation and IAC breakdown/freakouts of remote servers (caused usually by using wrong encoding). - bugfix LINEMODE FORWARDMASK and TTYPE IS exchange in client. - Experimental new "color palette", to help connect to legacy BBS's, it interprets the colors 0-16 to the likely author-intended* vga-style color codes. Many terminals have "custom" pallettes, and that's grea - new ``encoding=petscii`` and ``--encoding=atarist`` --- docs/api/color_filter.rst | 7 + docs/conf.py | 3 +- docs/history.rst | 36 ++ pyproject.toml | 3 + telnetlib3/__init__.py | 2 + telnetlib3/client.py | 99 +++- telnetlib3/client_base.py | 3 +- telnetlib3/client_shell.py | 13 + telnetlib3/color_filter.py | 431 ++++++++++++++++ telnetlib3/encodings/__init__.py | 48 ++ telnetlib3/encodings/atarist.py | 328 +++++++++++++ telnetlib3/encodings/petscii.py | 359 ++++++++++++++ telnetlib3/fingerprinting.py | 29 +- telnetlib3/mud.py | 81 ++- telnetlib3/server_fingerprinting.py | 144 +++++- telnetlib3/server_shell.py | 93 ++-- telnetlib3/stream_writer.py | 312 +++++++++++- telnetlib3/tests/test_color_filter.py | 460 ++++++++++++++++++ telnetlib3/tests/test_core.py | 3 + telnetlib3/tests/test_fingerprinting.py | 14 + telnetlib3/tests/test_mud.py | 95 ++++ telnetlib3/tests/test_mud_negotiation.py | 192 +++++++- .../tests/test_server_fingerprinting.py | 263 +++++++++- telnetlib3/tests/test_server_shell_unit.py | 38 -- telnetlib3/tests/test_stream_writer_extra.py | 48 +- telnetlib3/tests/test_stream_writer_full.py | 110 ++++- telnetlib3/tests/test_writer.py | 15 +- 27 files changed, 3063 insertions(+), 166 deletions(-) create mode 100644 docs/api/color_filter.rst create mode 100644 telnetlib3/color_filter.py create mode 100644 telnetlib3/encodings/__init__.py create mode 100644 telnetlib3/encodings/atarist.py create mode 100644 telnetlib3/encodings/petscii.py create mode 100644 telnetlib3/tests/test_color_filter.py diff --git a/docs/api/color_filter.rst b/docs/api/color_filter.rst new file mode 100644 index 00000000..79df2de3 --- /dev/null +++ b/docs/api/color_filter.rst @@ -0,0 +1,7 @@ +color_filter +------------ + +.. automodule:: telnetlib3.color_filter + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index e0e2f610..c7698f55 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,7 +60,8 @@ # General information about the project. project = "telnetlib3" -copyright = "2013 Jeff Quast" +import datetime +copyright = f"2013-{datetime.datetime.now().year} Jeff Quast" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/docs/history.rst b/docs/history.rst index 369d1c92..a4b62a20 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -1,5 +1,41 @@ History ======= +2.4.0 *unreleased* + * new: :mod:`telnetlib3.color_filter` module — translates 16-color ANSI SGR + codes to 24-bit RGB from hardware palettes (EGA, CGA, VGA, Amiga, xterm). + Enabled by default. New client CLI options: ``--colormatch``, + ``--color-brightness``, ``--color-contrast``, ``--background-color``, + ``--reverse-video``. + * new: :func:`~telnetlib3.mud.zmp_decode`, + :func:`~telnetlib3.mud.atcp_decode`, and + :func:`~telnetlib3.mud.aardwolf_decode` decode functions for ZMP (option + 93), ATCP (option 200), and Aardwolf (option 102) MUD protocols. + * new: :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_zmp`, + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_atcp`, + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_aardwolf`, + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_msp`, and + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_mxp` callbacks for + receiving MUD extended protocol subnegotiations, with accumulated data + stored in ``zmp_data``, ``atcp_data``, and ``aardwolf_data`` attributes. + * new: COM-PORT-OPTION (:rfc:`2217`) subnegotiation parsing with + ``comport_data`` attribute and + :meth:`~telnetlib3.stream_writer.TelnetWriter.request_comport_signature`. + * enhancement: ``telnetlib3-fingerprint`` now always probes extended MUD + options (MSP, MXP, ZMP, AARDWOLF, ATCP) during server scans and captures + ZMP, ATCP, Aardwolf, MXP, and COM-PORT data in session output. + * enhancement: ``telnetlib3-fingerprint`` smart prompt detection — + auto-answers yes/no, color, UTF-8 menu, ``who``, and ``help`` prompts. + * enhancement: ``--banner-max-bytes`` option for ``telnetlib3-fingerprint``; + default raised from 1024 to 65536. + * enhancement: new ``--encoding=petscii`` and ``--encoding=atarist`` + * bugfix: rare LINEMODE ACK loop with misbehaving servers that re-send + unchanged MODE without ACK. + * bugfix: unknown IAC commands no longer raise ``ValueError``; treated as + data. + * bugfix: client no longer asserts on ``TTYPE IS`` from server. + * bugfix: ``request_forwardmask()`` only called on server side. + * change: ``wcwidth`` is now a required dependency. + 2.3.0 * bugfix: repeat "socket.send() raised exception." exceptions diff --git a/pyproject.toml b/pyproject.toml index 124f2bed..963ddcb4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,6 +43,9 @@ classifiers = [ "Topic :: Terminals :: Telnet", ] requires-python = ">=3.9" +dependencies = [ + "wcwidth>=0.2.13", +] [project.optional-dependencies] docs = [ diff --git a/telnetlib3/__init__.py b/telnetlib3/__init__.py index 4b530fd2..24183b8c 100644 --- a/telnetlib3/__init__.py +++ b/telnetlib3/__init__.py @@ -16,6 +16,7 @@ from . import stream_reader from . import client_base from . import client_shell +from . import color_filter from . import client from . import telopt from . import mud @@ -26,6 +27,7 @@ from . import server_fingerprinting if sys.platform != "win32": from . import fingerprinting_display # noqa: F401 +from . import encodings # noqa: F401 - registers custom codecs (petscii, atarist) from . import sync from .server_base import * # noqa from .server import * # noqa diff --git a/telnetlib3/client.py b/telnetlib3/client.py index cd574667..4c2a3e06 100755 --- a/telnetlib3/client.py +++ b/telnetlib3/client.py @@ -528,11 +528,48 @@ def _patched_connection_made(transport: asyncio.BaseTransport) -> None: client_factory = _client_factory + # Wrap the shell callback to inject color filter when enabled + colormatch: str = args["colormatch"] + shell_callback = args["shell"] + if colormatch.lower() != "none": + # local + from .color_filter import ( # pylint: disable=import-outside-toplevel + PALETTES, + ColorConfig, + ColorFilter, + ) + + if colormatch not in PALETTES: + print( + f"Unknown palette {colormatch!r}," f" available: {', '.join(sorted(PALETTES))}", + file=sys.stderr, + ) + sys.exit(1) + color_config = ColorConfig( + palette_name=colormatch, + brightness=args["color_brightness"], + contrast=args["color_contrast"], + background_color=args["background_color"], + reverse_video=args["reverse_video"], + ) + color_filter = ColorFilter(color_config) + original_shell = shell_callback + + async def _color_shell( + reader: Union[TelnetReader, TelnetReaderUnicode], + writer_arg: Union[TelnetWriter, TelnetWriterUnicode], + ) -> None: + # pylint: disable-next=protected-access + writer_arg._color_filter = color_filter # type: ignore[union-attr] + await original_shell(reader, writer_arg) + + shell_callback = _color_shell + # Build connection kwargs explicitly to avoid pylint false positive connection_kwargs: Dict[str, Any] = { "encoding": args["encoding"], "tspeed": args["tspeed"], - "shell": args["shell"], + "shell": shell_callback, "term": args["term"], "force_binary": args["force_binary"], "encoding_errors": args["encoding_errors"], @@ -607,6 +644,43 @@ def _get_argument_parser() -> argparse.ArgumentParser: metavar="OPT", help="always send DO for this option (name like GMCP or number, repeatable)", ) + parser.add_argument( + "--colormatch", + default="ega", + metavar="PALETTE", + help=( + "translate basic 16-color ANSI codes to exact 24-bit RGB values" + " from a named hardware palette, bypassing the terminal's custom" + " palette to preserve intended MUD/BBS artwork colors" + " (ega, cga, vga, amiga, xterm, none)" + ), + ) + parser.add_argument( + "--color-brightness", + default=0.9, + type=float, + metavar="FLOAT", + help="color brightness scale [0.0..1.0], where 1.0 is original", + ) + parser.add_argument( + "--color-contrast", + default=0.8, + type=float, + metavar="FLOAT", + help="color contrast scale [0.0..1.0], where 1.0 is original", + ) + parser.add_argument( + "--background-color", + default="#101010", + metavar="#RRGGBB", + help="forced background color as hex RGB (near-black by default)", + ) + parser.add_argument( + "--reverse-video", + action="store_true", + default=False, + help="swap foreground/background for light-background terminals", + ) return parser @@ -627,6 +701,20 @@ def _parse_option_arg(value: str) -> bytes: return bytes([int(value)]) +def _parse_background_color(value: str) -> Tuple[int, int, int]: + """ + Parse hex color string to RGB tuple. + + :param value: Color string like ``"#RRGGBB"`` or ``"RRGGBB"``. + :returns: (R, G, B) tuple with values 0-255. + :raises ValueError: When *value* is not a valid hex color. + """ + h = value.lstrip("#") + if len(h) != 6: + raise ValueError(f"invalid hex color: {value!r}") + return (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + + def _transform_args(args: argparse.Namespace) -> Dict[str, Any]: return { "host": args.host, @@ -645,6 +733,11 @@ def _transform_args(args: argparse.Namespace) -> Dict[str, Any]: "send_environ": tuple(v.strip() for v in args.send_environ.split(",") if v.strip()), "always_will": {_parse_option_arg(v) for v in args.always_will}, "always_do": {_parse_option_arg(v) for v in args.always_do}, + "colormatch": args.colormatch, + "color_brightness": args.color_brightness, + "color_contrast": args.color_contrast, + "background_color": _parse_background_color(args.background_color), + "reverse_video": args.reverse_video, } @@ -741,6 +834,9 @@ def _get_fingerprint_argument_parser() -> argparse.ArgumentParser: parser.add_argument( "--banner-max-wait", default=8.0, type=float, help="max seconds to wait for banner data" ) + parser.add_argument( + "--banner-max-bytes", default=65536, type=int, help="max bytes per banner read call" + ) return parser @@ -778,6 +874,7 @@ async def run_fingerprint_client() -> None: mssp_wait=args.mssp_wait, banner_quiet_time=args.banner_quiet_time, banner_max_wait=args.banner_max_wait, + banner_max_bytes=args.banner_max_bytes, ) # Parse --always-will/--always-do option names/numbers diff --git a/telnetlib3/client_base.py b/telnetlib3/client_base.py index d62e6760..d28dd813 100644 --- a/telnetlib3/client_base.py +++ b/telnetlib3/client_base.py @@ -123,7 +123,7 @@ def connection_lost(self, exc: Optional[Exception]) -> None: # the StreamReader will receive eof. self._waiter_connected.set_result(None) - if self.shell is None: + if self.shell is None and not self.waiter_closed.done(): # when a shell is defined, we allow the completion of the coroutine # to set the result of waiter_closed. self.waiter_closed.set_result(weakref.proxy(self)) @@ -200,6 +200,7 @@ def begin_shell(self, future: asyncio.Future[None]) -> None: lambda fut_obj: ( self.waiter_closed.set_result(weakref.proxy(self)) if self.waiter_closed is not None + and not self.waiter_closed.done() else None ) ) diff --git a/telnetlib3/client_shell.py b/telnetlib3/client_shell.py index 39532fe8..f63516a0 100644 --- a/telnetlib3/client_shell.py +++ b/telnetlib3/client_shell.py @@ -242,6 +242,11 @@ def _on_winch() -> None: if telnet_task in wait_for: telnet_task.cancel() wait_for.remove(telnet_task) + _cf = getattr(telnet_writer, "_color_filter", None) + if _cf is not None: + _flush = _cf.flush() + if _flush: + stdout.write(_flush.encode()) stdout.write(f"\033[m{linesep}Connection closed.{linesep}".encode()) # Cleanup resize handler on local escape close if term._istty and remove_winch: # pylint: disable=protected-access @@ -273,6 +278,11 @@ def _on_winch() -> None: if stdin_task in wait_for: stdin_task.cancel() wait_for.remove(stdin_task) + _cf = getattr(telnet_writer, "_color_filter", None) + if _cf is not None: + _flush = _cf.flush() + if _flush: + stdout.write(_flush.encode()) stdout.write( f"\033[m{linesep}Connection closed by foreign host.{linesep}".encode() ) @@ -289,6 +299,9 @@ def _on_winch() -> None: except Exception: # pylint: disable=broad-exception-caught pass else: + _cf = getattr(telnet_writer, "_color_filter", None) + if _cf is not None: + out = _cf.filter(out) stdout.write(out.encode() or b":?!?:") telnet_task = accessories.make_reader_task(telnet_reader, size=2**24) wait_for.add(telnet_task) diff --git a/telnetlib3/color_filter.py b/telnetlib3/color_filter.py new file mode 100644 index 00000000..4120c3a7 --- /dev/null +++ b/telnetlib3/color_filter.py @@ -0,0 +1,431 @@ +""" +ANSI color palette translation for telnet client output. + +Most modern terminals use custom palette colors for ANSI colors 0-15 (e.g. +Solarized, Dracula, Gruvbox themes). When connecting to MUDs and BBS systems, +the artwork and text colors were designed for specific hardware palettes such as +IBM EGA, VGA, or Amiga. The terminal's custom palette distorts the intended +colors, often ruining ANSI artwork. + +By translating basic 16-color SGR codes into their exact 24-bit RGB equivalents +from named hardware palettes, we bypass the terminal's palette entirely and +display the colors the artist intended. + +This feature is enabled by default using the EGA palette. Use +``--colormatch=none`` on the ``telnetlib3-client`` command line to disable it. + +Example usage:: + + # Default EGA palette with brightness/contrast adjustment + telnetlib3-client mud.example.com 4000 + + # Use VGA palette instead + telnetlib3-client --colormatch=vga mud.example.com + + # Disable color translation entirely + telnetlib3-client --colormatch=none mud.example.com + + # Custom brightness and contrast + telnetlib3-client --color-brightness=0.7 --color-contrast=0.6 mud.example.com + + # White-background terminal (reverse video) + telnetlib3-client --reverse-video mud.example.com +""" + +from __future__ import annotations + +# std imports +import re +from typing import Dict, List, Match, Tuple, Optional, NamedTuple + +# 3rd party +from wcwidth.sgr_state import _SGR_PATTERN + +__all__ = ("ColorConfig", "ColorFilter", "PALETTES") + +# Type alias for a 16-color palette: 16 (R, G, B) tuples indexed 0-15. +# Index 0-7: normal colors (black, red, green, yellow, blue, magenta, cyan, white) +# Index 8-15: bright variants of the same order. +PaletteRGB = Tuple[ + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], +] + +# Hardware color palettes. Each defines exact RGB values for ANSI colors 0-15. +PALETTES: Dict[str, PaletteRGB] = { + # IBM Enhanced Graphics Adapter -- the classic DOS palette used by most + # BBS and MUD ANSI artwork. + "ega": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 85, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (170, 170, 170), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # IBM Color Graphics Adapter -- earlier, more saturated palette. + "cga": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 170, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (170, 170, 170), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # VGA / DOS standard palette -- the most common DOS palette, very close + # to EGA but with a brighter dark yellow. + "vga": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 85, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (170, 170, 170), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # Amiga Workbench 1.x palette -- warmer tones characteristic of the + # Commodore Amiga. + "amiga": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 170, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (187, 187, 187), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # xterm default palette -- the standard xterm color table. + "xterm": ( + (0, 0, 0), + (205, 0, 0), + (0, 205, 0), + (205, 205, 0), + (0, 0, 238), + (205, 0, 205), + (0, 205, 205), + (229, 229, 229), + (127, 127, 127), + (255, 0, 0), + (0, 255, 0), + (255, 255, 0), + (92, 92, 255), + (255, 0, 255), + (0, 255, 255), + (255, 255, 255), + ), +} + +# Detect potentially incomplete escape sequence at end of a chunk. +_TRAILING_ESC = re.compile(r"\x1b(\[[\d;:]*)?$") + + +class ColorConfig(NamedTuple): + """ + Configuration for ANSI color palette translation. + + :param palette_name: Name of the hardware palette to use (key in PALETTES). + :param brightness: Brightness scale factor [0.0..1.0], where 1.0 is original. + :param contrast: Contrast scale factor [0.0..1.0], where 1.0 is original. + :param background_color: Forced background RGB as (R, G, B) tuple. + :param reverse_video: When True, swap fg/bg for light-background terminals. + """ + + palette_name: str = "ega" + brightness: float = 0.9 + contrast: float = 0.8 + background_color: Tuple[int, int, int] = (16, 16, 16) + reverse_video: bool = False + + +def _sgr_code_to_palette_index(code: int) -> Optional[int]: + """ + Map a basic SGR color code to a palette index (0-15). + + :param code: SGR parameter value (30-37, 40-47, 90-97, or 100-107). + :returns: Palette index 0-15, or None if not a basic color code. + """ + if 30 <= code <= 37: + return code - 30 + if 40 <= code <= 47: + return code - 40 + if 90 <= code <= 97: + return code - 90 + 8 + if 100 <= code <= 107: + return code - 100 + 8 + return None + + +def _is_foreground_code(code: int) -> bool: + """ + Return True if *code* is a foreground color SGR parameter. + + :param code: SGR parameter value. + :returns: True for foreground codes (30-37, 90-97). + """ + return (30 <= code <= 37) or (90 <= code <= 97) + + +def _adjust_color( + r: int, g: int, b: int, brightness: float, contrast: float +) -> Tuple[int, int, int]: + """ + Apply brightness and contrast scaling to an RGB color. + + Brightness scales linearly toward black (0.0 = black, 1.0 = original). + Contrast scales linearly toward mid-gray (0.0 = flat gray, 1.0 = original). + Result is clamped to 0-255. + + :param r: Red channel (0-255). + :param g: Green channel (0-255). + :param b: Blue channel (0-255). + :param brightness: Brightness factor [0.0..1.0]. + :param contrast: Contrast factor [0.0..1.0]. + :returns: Adjusted (R, G, B) tuple. + """ + mid = 127.5 + r_f = mid + (r * brightness - mid) * contrast + g_f = mid + (g * brightness - mid) * contrast + b_f = mid + (b * brightness - mid) * contrast + return ( + max(0, min(255, int(r_f + 0.5))), + max(0, min(255, int(g_f + 0.5))), + max(0, min(255, int(b_f + 0.5))), + ) + + +class ColorFilter: + """ + Stateful ANSI color palette translation filter. + + Translates basic 16-color ANSI SGR codes to 24-bit RGB equivalents from a named hardware + palette, with brightness/contrast adjustment and background color enforcement. + + The filter is designed to process chunked text (as received from a telnet connection) and + correctly handles escape sequences split across chunk boundaries. + + :param config: Color configuration parameters. + """ + + def __init__(self, config: ColorConfig) -> None: + """Initialize with the given color configuration.""" + self._config = config + palette = PALETTES[config.palette_name] + self._adjusted: List[Tuple[int, int, int]] = [ + _adjust_color(r, g, b, config.brightness, config.contrast) for r, g, b in palette + ] + bg = config.background_color + if config.reverse_video: + bg = (255 - bg[0], 255 - bg[1], 255 - bg[2]) + self._bg_sgr = f"\x1b[48;2;{bg[0]};{bg[1]};{bg[2]}m" + self._buffer = "" + self._initial = True + self._bold = False + + def filter(self, text: str) -> str: + """ + Transform SGR sequences in *text* using the configured palette. + + Handles chunked input by buffering incomplete trailing escape sequences across calls. On + the very first non-empty output, the configured background color is injected. + + :param text: Input text, possibly containing ANSI escape sequences. + :returns: Text with basic colors replaced by 24-bit RGB equivalents. + """ + if self._buffer: + text = self._buffer + text + self._buffer = "" + + match = _TRAILING_ESC.search(text) + if match: + self._buffer = match.group() + text = text[: match.start()] + + if not text: + return "" + + result = _SGR_PATTERN.sub(self._replace_sgr, text) + + if self._initial: + self._initial = False + result = self._bg_sgr + result + return result + + # pylint: disable-next=too-complex,too-many-branches,too-many-statements + def _replace_sgr(self, match: Match[str]) -> str: # noqa: C901 + r""" + Regex replacement callback for a single SGR sequence. + + Tracks bold state across calls so that ``\x1b[1;30m`` (bold + black) uses the bright palette + entry (index 8) instead of pure black. This preserves the traditional "bold as bright" + rendering that legacy systems rely on, which would otherwise be lost when converting to + 24-bit RGB (terminals do not brighten true-color values for bold). + """ + params_str = match.group(1) + + # Empty params or bare "0" → reset + if not params_str: + self._bold = False + return f"\x1b[0m{self._bg_sgr}" + + # Colon-separated extended colors (ITU T.416) — pass through unchanged + if ":" in params_str: + return match.group() + + parts = params_str.split(";") + output_parts: List[str] = [] + i = 0 + has_reset = False + + # Pre-scan: check if bold (1) appears in this sequence so that a + # color code *before* the bold in the same sequence still gets the + # bright treatment, e.g. \x1b[31;1m should brighten red. + seq_sets_bold = False + for part in parts: + try: + val = int(part) if part else 0 + except ValueError: + continue + if val == 1: + seq_sets_bold = True + break + + # Effective bold for color lookups in this sequence + bold = self._bold or seq_sets_bold + + while i < len(parts): + try: + p = int(parts[i]) if parts[i] else 0 + except ValueError: + output_parts.append(parts[i]) + i += 1 + continue + + if p == 0: + has_reset = True + bold = False + output_parts.append("0") + i += 1 + continue + + # Track bold state + if p == 1: + output_parts.append("1") + i += 1 + continue + if p == 22: + bold = False + output_parts.append("22") + i += 1 + continue + + # Extended color — pass through 38;5;N or 38;2;R;G;B verbatim + if p in (38, 48): + start_i = i + i += 1 + if i < len(parts): + try: + mode = int(parts[i]) if parts[i] else 0 + except ValueError: + mode = 0 + i += 1 + if mode == 5 and i < len(parts): + i += 1 + elif mode == 2 and i + 2 < len(parts): + i += 3 + output_parts.extend(parts[start_i:i]) + continue + + # Default fg/bg — pass through + if p in (39, 49): + output_parts.append(str(p)) + i += 1 + continue + + idx = _sgr_code_to_palette_index(p) + if idx is not None: + is_fg = _is_foreground_code(p) + # Bold-as-bright: promote normal fg 30-37 to bright 8-15 + if is_fg and bold and 30 <= p <= 37: + idx += 8 + r, g, b = self._adjusted[idx] + if self._config.reverse_video: + is_fg = not is_fg + if is_fg: + output_parts.extend(["38", "2", str(r), str(g), str(b)]) + else: + output_parts.extend(["48", "2", str(r), str(g), str(b)]) + else: + output_parts.append(str(p)) + i += 1 + + # Update persistent bold state for subsequent sequences + self._bold = bold + + result = f"\x1b[{';'.join(output_parts)}m" if output_parts else "" + if has_reset: + result += self._bg_sgr + return result + + def flush(self) -> str: + """ + Flush any buffered partial escape sequence. + + Call this when the stream closes to emit any remaining buffered bytes. + + :returns: Buffered content (may be an incomplete escape sequence). + """ + result = self._buffer + self._buffer = "" + return result diff --git a/telnetlib3/encodings/__init__.py b/telnetlib3/encodings/__init__.py new file mode 100644 index 00000000..5a950447 --- /dev/null +++ b/telnetlib3/encodings/__init__.py @@ -0,0 +1,48 @@ +""" +Custom BBS/retro-computing codecs for telnetlib3. + +Registers petscii and atarist codecs with Python's codecs module on import. +These encodings are then available for use with ``bytes.decode()`` and the +``--encoding`` CLI flag of ``telnetlib3-fingerprint``. +""" + +# std imports +import codecs +import importlib + +_cache = {} +_aliases = {} + + +def _search_function(encoding): + """Codec search function registered with codecs.register().""" + normalized = encoding.lower().replace('-', '_') + + if normalized in _aliases: + return _aliases[normalized] + + if normalized in _cache: + return _cache[normalized] + + try: + mod = importlib.import_module(f'.{normalized}', package=__name__) + except ImportError: + _cache[normalized] = None + return None + + try: + info = mod.getregentry() + except AttributeError: + _cache[normalized] = None + return None + + _cache[normalized] = info + + if hasattr(mod, 'getaliases'): + for alias in mod.getaliases(): + _aliases[alias] = info + + return info + + +codecs.register(_search_function) diff --git a/telnetlib3/encodings/atarist.py b/telnetlib3/encodings/atarist.py new file mode 100644 index 00000000..c454d19c --- /dev/null +++ b/telnetlib3/encodings/atarist.py @@ -0,0 +1,328 @@ +""" +Atari ST codec. + +Generated from ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT +""" +# pylint: disable=redefined-builtin + +# std imports +import codecs + + +class Codec(codecs.Codec): + """Atari ST character map codec.""" + + def encode(self, input, errors='strict'): + """Encode input string using Atari ST character map.""" + return codecs.charmap_encode(input, errors, ENCODING_TABLE) + + def decode(self, input, errors='strict'): + """Decode input bytes using Atari ST character map.""" + return codecs.charmap_decode(input, errors, DECODING_TABLE) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + """Atari ST incremental encoder.""" + + def encode(self, input, final=False): + """Encode input string incrementally.""" + return codecs.charmap_encode(input, self.errors, ENCODING_TABLE)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): + """Atari ST incremental decoder.""" + + def decode(self, input, final=False): + """Decode input bytes incrementally.""" + return codecs.charmap_decode(input, self.errors, DECODING_TABLE)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): + """Atari ST stream writer.""" + + +class StreamReader(Codec, codecs.StreamReader): + """Atari ST stream reader.""" + + +def getaliases(): + """Return codec aliases.""" + return ('atari',) + + +def getregentry(): + """Return the codec registry entry.""" + return codecs.CodecInfo( + name='atarist', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +# Decoding Table + +DECODING_TABLE = ( + '\x00' # 0x00 -> NULL + '\x01' # 0x01 -> START OF HEADING + '\x02' # 0x02 -> START OF TEXT + '\x03' # 0x03 -> END OF TEXT + '\x04' # 0x04 -> END OF TRANSMISSION + '\x05' # 0x05 -> ENQUIRY + '\x06' # 0x06 -> ACKNOWLEDGE + '\x07' # 0x07 -> BELL + '\x08' # 0x08 -> BACKSPACE + '\t' # 0x09 -> HORIZONTAL TABULATION + '\n' # 0x0A -> LINE FEED + '\x0b' # 0x0B -> VERTICAL TABULATION + '\x0c' # 0x0C -> FORM FEED + '\r' # 0x0D -> CARRIAGE RETURN + '\x0e' # 0x0E -> SHIFT OUT + '\x0f' # 0x0F -> SHIFT IN + '\x10' # 0x10 -> DATA LINK ESCAPE + '\x11' # 0x11 -> DEVICE CONTROL ONE + '\x12' # 0x12 -> DEVICE CONTROL TWO + '\x13' # 0x13 -> DEVICE CONTROL THREE + '\x14' # 0x14 -> DEVICE CONTROL FOUR + '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + '\x16' # 0x16 -> SYNCHRONOUS IDLE + '\x17' # 0x17 -> END OF TRANSMISSION BLOCK + '\x18' # 0x18 -> CANCEL + '\x19' # 0x19 -> END OF MEDIUM + '\x1a' # 0x1A -> SUBSTITUTE + '\x1b' # 0x1B -> ESCAPE + '\x1c' # 0x1C -> FILE SEPARATOR + '\x1d' # 0x1D -> GROUP SEPARATOR + '\x1e' # 0x1E -> RECORD SEPARATOR + '\x1f' # 0x1F -> UNIT SEPARATOR + ' ' # 0x20 -> SPACE + '!' # 0x21 -> EXCLAMATION MARK + '"' # 0x22 -> QUOTATION MARK + '#' # 0x23 -> NUMBER SIGN + '$' # 0x24 -> DOLLAR SIGN + '%' # 0x25 -> PERCENT SIGN + '&' # 0x26 -> AMPERSAND + "'" # 0x27 -> APOSTROPHE + '(' # 0x28 -> LEFT PARENTHESIS + ')' # 0x29 -> RIGHT PARENTHESIS + '*' # 0x2A -> ASTERISK + '+' # 0x2B -> PLUS SIGN + ',' # 0x2C -> COMMA + '-' # 0x2D -> HYPHEN-MINUS + '.' # 0x2E -> FULL STOP + '/' # 0x2F -> SOLIDUS + '0' # 0x30 -> DIGIT ZERO + '1' # 0x31 -> DIGIT ONE + '2' # 0x32 -> DIGIT TWO + '3' # 0x33 -> DIGIT THREE + '4' # 0x34 -> DIGIT FOUR + '5' # 0x35 -> DIGIT FIVE + '6' # 0x36 -> DIGIT SIX + '7' # 0x37 -> DIGIT SEVEN + '8' # 0x38 -> DIGIT EIGHT + '9' # 0x39 -> DIGIT NINE + ':' # 0x3A -> COLON + ';' # 0x3B -> SEMICOLON + '<' # 0x3C -> LESS-THAN SIGN + '=' # 0x3D -> EQUALS SIGN + '>' # 0x3E -> GREATER-THAN SIGN + '?' # 0x3F -> QUESTION MARK + '@' # 0x40 -> COMMERCIAL AT + 'A' # 0x41 -> LATIN CAPITAL LETTER A + 'B' # 0x42 -> LATIN CAPITAL LETTER B + 'C' # 0x43 -> LATIN CAPITAL LETTER C + 'D' # 0x44 -> LATIN CAPITAL LETTER D + 'E' # 0x45 -> LATIN CAPITAL LETTER E + 'F' # 0x46 -> LATIN CAPITAL LETTER F + 'G' # 0x47 -> LATIN CAPITAL LETTER G + 'H' # 0x48 -> LATIN CAPITAL LETTER H + 'I' # 0x49 -> LATIN CAPITAL LETTER I + 'J' # 0x4A -> LATIN CAPITAL LETTER J + 'K' # 0x4B -> LATIN CAPITAL LETTER K + 'L' # 0x4C -> LATIN CAPITAL LETTER L + 'M' # 0x4D -> LATIN CAPITAL LETTER M + 'N' # 0x4E -> LATIN CAPITAL LETTER N + 'O' # 0x4F -> LATIN CAPITAL LETTER O + 'P' # 0x50 -> LATIN CAPITAL LETTER P + 'Q' # 0x51 -> LATIN CAPITAL LETTER Q + 'R' # 0x52 -> LATIN CAPITAL LETTER R + 'S' # 0x53 -> LATIN CAPITAL LETTER S + 'T' # 0x54 -> LATIN CAPITAL LETTER T + 'U' # 0x55 -> LATIN CAPITAL LETTER U + 'V' # 0x56 -> LATIN CAPITAL LETTER V + 'W' # 0x57 -> LATIN CAPITAL LETTER W + 'X' # 0x58 -> LATIN CAPITAL LETTER X + 'Y' # 0x59 -> LATIN CAPITAL LETTER Y + 'Z' # 0x5A -> LATIN CAPITAL LETTER Z + '[' # 0x5B -> LEFT SQUARE BRACKET + '\\' # 0x5C -> REVERSE SOLIDUS + ']' # 0x5D -> RIGHT SQUARE BRACKET + '^' # 0x5E -> CIRCUMFLEX ACCENT + '_' # 0x5F -> LOW LINE + '`' # 0x60 -> GRAVE ACCENT + 'a' # 0x61 -> LATIN SMALL LETTER A + 'b' # 0x62 -> LATIN SMALL LETTER B + 'c' # 0x63 -> LATIN SMALL LETTER C + 'd' # 0x64 -> LATIN SMALL LETTER D + 'e' # 0x65 -> LATIN SMALL LETTER E + 'f' # 0x66 -> LATIN SMALL LETTER F + 'g' # 0x67 -> LATIN SMALL LETTER G + 'h' # 0x68 -> LATIN SMALL LETTER H + 'i' # 0x69 -> LATIN SMALL LETTER I + 'j' # 0x6A -> LATIN SMALL LETTER J + 'k' # 0x6B -> LATIN SMALL LETTER K + 'l' # 0x6C -> LATIN SMALL LETTER L + 'm' # 0x6D -> LATIN SMALL LETTER M + 'n' # 0x6E -> LATIN SMALL LETTER N + 'o' # 0x6F -> LATIN SMALL LETTER O + 'p' # 0x70 -> LATIN SMALL LETTER P + 'q' # 0x71 -> LATIN SMALL LETTER Q + 'r' # 0x72 -> LATIN SMALL LETTER R + 's' # 0x73 -> LATIN SMALL LETTER S + 't' # 0x74 -> LATIN SMALL LETTER T + 'u' # 0x75 -> LATIN SMALL LETTER U + 'v' # 0x76 -> LATIN SMALL LETTER V + 'w' # 0x77 -> LATIN SMALL LETTER W + 'x' # 0x78 -> LATIN SMALL LETTER X + 'y' # 0x79 -> LATIN SMALL LETTER Y + 'z' # 0x7A -> LATIN SMALL LETTER Z + '{' # 0x7B -> LEFT CURLY BRACKET + '|' # 0x7C -> VERTICAL LINE + '}' # 0x7D -> RIGHT CURLY BRACKET + '~' # 0x7E -> TILDE + '\x7f' # 0x7F -> DELETE + '\xc7' # 0x80 -> LATIN CAPITAL LETTER C WITH CEDILLA + '\xfc' # 0x81 -> LATIN SMALL LETTER U WITH DIAERESIS + '\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE + '\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + '\xe4' # 0x84 -> LATIN SMALL LETTER A WITH DIAERESIS + '\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE + '\xe5' # 0x86 -> LATIN SMALL LETTER A WITH RING ABOVE + '\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA + '\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + '\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS + '\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE + '\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS + '\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX + '\xec' # 0x8D -> LATIN SMALL LETTER I WITH GRAVE + '\xc4' # 0x8E -> LATIN CAPITAL LETTER A WITH DIAERESIS + '\xc5' # 0x8F -> LATIN CAPITAL LETTER A WITH RING ABOVE + '\xc9' # 0x90 -> LATIN CAPITAL LETTER E WITH ACUTE + '\xe6' # 0x91 -> LATIN SMALL LETTER AE + '\xc6' # 0x92 -> LATIN CAPITAL LETTER AE + '\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + '\xf6' # 0x94 -> LATIN SMALL LETTER O WITH DIAERESIS + '\xf2' # 0x95 -> LATIN SMALL LETTER O WITH GRAVE + '\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + '\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE + '\xff' # 0x98 -> LATIN SMALL LETTER Y WITH DIAERESIS + '\xd6' # 0x99 -> LATIN CAPITAL LETTER O WITH DIAERESIS + '\xdc' # 0x9A -> LATIN CAPITAL LETTER U WITH DIAERESIS + '\xa2' # 0x9B -> CENT SIGN + '\xa3' # 0x9C -> POUND SIGN + '\xa5' # 0x9D -> YEN SIGN + '\xdf' # 0x9E -> LATIN SMALL LETTER SHARP S + '\u0192' # 0x9F -> LATIN SMALL LETTER F WITH HOOK + '\xe1' # 0xA0 -> LATIN SMALL LETTER A WITH ACUTE + '\xed' # 0xA1 -> LATIN SMALL LETTER I WITH ACUTE + '\xf3' # 0xA2 -> LATIN SMALL LETTER O WITH ACUTE + '\xfa' # 0xA3 -> LATIN SMALL LETTER U WITH ACUTE + '\xf1' # 0xA4 -> LATIN SMALL LETTER N WITH TILDE + '\xd1' # 0xA5 -> LATIN CAPITAL LETTER N WITH TILDE + '\xaa' # 0xA6 -> FEMININE ORDINAL INDICATOR + '\xba' # 0xA7 -> MASCULINE ORDINAL INDICATOR + '\xbf' # 0xA8 -> INVERTED QUESTION MARK + '\u2310' # 0xA9 -> REVERSED NOT SIGN + '\xac' # 0xAA -> NOT SIGN + '\xbd' # 0xAB -> VULGAR FRACTION ONE HALF + '\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER + '\xa1' # 0xAD -> INVERTED EXCLAMATION MARK + '\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + '\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + '\xe3' # 0xB0 -> LATIN SMALL LETTER A WITH TILDE + '\xf5' # 0xB1 -> LATIN SMALL LETTER O WITH TILDE + '\xd8' # 0xB2 -> LATIN CAPITAL LETTER O WITH STROKE + '\xf8' # 0xB3 -> LATIN SMALL LETTER O WITH STROKE + '\u0153' # 0xB4 -> LATIN SMALL LIGATURE OE + '\u0152' # 0xB5 -> LATIN CAPITAL LIGATURE OE + '\xc0' # 0xB6 -> LATIN CAPITAL LETTER A WITH GRAVE + '\xc3' # 0xB7 -> LATIN CAPITAL LETTER A WITH TILDE + '\xd5' # 0xB8 -> LATIN CAPITAL LETTER O WITH TILDE + '\xa8' # 0xB9 -> DIAERESIS + '\xb4' # 0xBA -> ACUTE ACCENT + '\u2020' # 0xBB -> DAGGER + '\xb6' # 0xBC -> PILCROW SIGN + '\xa9' # 0xBD -> COPYRIGHT SIGN + '\xae' # 0xBE -> REGISTERED SIGN + '\u2122' # 0xBF -> TRADE MARK SIGN + '\u0133' # 0xC0 -> LATIN SMALL LIGATURE IJ + '\u0132' # 0xC1 -> LATIN CAPITAL LIGATURE IJ + '\u05d0' # 0xC2 -> HEBREW LETTER ALEF + '\u05d1' # 0xC3 -> HEBREW LETTER BET + '\u05d2' # 0xC4 -> HEBREW LETTER GIMEL + '\u05d3' # 0xC5 -> HEBREW LETTER DALET + '\u05d4' # 0xC6 -> HEBREW LETTER HE + '\u05d5' # 0xC7 -> HEBREW LETTER VAV + '\u05d6' # 0xC8 -> HEBREW LETTER ZAYIN + '\u05d7' # 0xC9 -> HEBREW LETTER HET + '\u05d8' # 0xCA -> HEBREW LETTER TET + '\u05d9' # 0xCB -> HEBREW LETTER YOD + '\u05db' # 0xCC -> HEBREW LETTER KAF + '\u05dc' # 0xCD -> HEBREW LETTER LAMED + '\u05de' # 0xCE -> HEBREW LETTER MEM + '\u05e0' # 0xCF -> HEBREW LETTER NUN + '\u05e1' # 0xD0 -> HEBREW LETTER SAMEKH + '\u05e2' # 0xD1 -> HEBREW LETTER AYIN + '\u05e4' # 0xD2 -> HEBREW LETTER PE + '\u05e6' # 0xD3 -> HEBREW LETTER TSADI + '\u05e7' # 0xD4 -> HEBREW LETTER QOF + '\u05e8' # 0xD5 -> HEBREW LETTER RESH + '\u05e9' # 0xD6 -> HEBREW LETTER SHIN + '\u05ea' # 0xD7 -> HEBREW LETTER TAV + '\u05df' # 0xD8 -> HEBREW LETTER FINAL NUN + '\u05da' # 0xD9 -> HEBREW LETTER FINAL KAF + '\u05dd' # 0xDA -> HEBREW LETTER FINAL MEM + '\u05e3' # 0xDB -> HEBREW LETTER FINAL PE + '\u05e5' # 0xDC -> HEBREW LETTER FINAL TSADI + '\xa7' # 0xDD -> SECTION SIGN + '\u2227' # 0xDE -> LOGICAL AND + '\u221e' # 0xDF -> INFINITY + '\u03b1' # 0xE0 -> GREEK SMALL LETTER ALPHA + '\u03b2' # 0xE1 -> GREEK SMALL LETTER BETA + '\u0393' # 0xE2 -> GREEK CAPITAL LETTER GAMMA + '\u03c0' # 0xE3 -> GREEK SMALL LETTER PI + '\u03a3' # 0xE4 -> GREEK CAPITAL LETTER SIGMA + '\u03c3' # 0xE5 -> GREEK SMALL LETTER SIGMA + '\xb5' # 0xE6 -> MICRO SIGN + '\u03c4' # 0xE7 -> GREEK SMALL LETTER TAU + '\u03a6' # 0xE8 -> GREEK CAPITAL LETTER PHI + '\u0398' # 0xE9 -> GREEK CAPITAL LETTER THETA + '\u03a9' # 0xEA -> GREEK CAPITAL LETTER OMEGA + '\u03b4' # 0xEB -> GREEK SMALL LETTER DELTA + '\u222e' # 0xEC -> CONTOUR INTEGRAL + '\u03c6' # 0xED -> GREEK SMALL LETTER PHI + '\u2208' # 0xEE -> ELEMENT OF SIGN + '\u2229' # 0xEF -> INTERSECTION + '\u2261' # 0xF0 -> IDENTICAL TO + '\xb1' # 0xF1 -> PLUS-MINUS SIGN + '\u2265' # 0xF2 -> GREATER-THAN OR EQUAL TO + '\u2264' # 0xF3 -> LESS-THAN OR EQUAL TO + '\u2320' # 0xF4 -> TOP HALF INTEGRAL + '\u2321' # 0xF5 -> BOTTOM HALF INTEGRAL + '\xf7' # 0xF6 -> DIVISION SIGN + '\u2248' # 0xF7 -> ALMOST EQUAL TO + '\xb0' # 0xF8 -> DEGREE SIGN + '\u2219' # 0xF9 -> BULLET OPERATOR + '\xb7' # 0xFA -> MIDDLE DOT + '\u221a' # 0xFB -> SQUARE ROOT + '\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N + '\xb2' # 0xFD -> SUPERSCRIPT TWO + '\xb3' # 0xFE -> SUPERSCRIPT THREE + '\xaf' # 0xFF -> MACRON +) + +# Encoding table +ENCODING_TABLE = codecs.charmap_build(DECODING_TABLE) diff --git a/telnetlib3/encodings/petscii.py b/telnetlib3/encodings/petscii.py new file mode 100644 index 00000000..68f11edf --- /dev/null +++ b/telnetlib3/encodings/petscii.py @@ -0,0 +1,359 @@ +""" +PETSCII (Commodore 64/128) encoding -- shifted (lowercase) mode. + +PETSCII is the character encoding used by Commodore computers (C64, C128, +VIC-20, Plus/4, etc.). This codec implements the "shifted" character set +(mixed case with lowercase at 0x41-0x5A and uppercase at 0xC1-0xDA), +which is the standard mode for BBS operation. + +Mapping sources: +- Commodore 64 Programmer's Reference Guide +- https://sta.c64.org/cbm64pet.html +- Unicode Consortium Legacy Computing Supplement (U+1FB00-U+1FBFF) + +Control codes (0x00-0x1F, 0x80-0x9F) are mapped to their ASCII control +code equivalents where sensible, otherwise to U+FFFD with 'replace' errors. +Graphics characters use the closest available Unicode approximations from +the Box Drawing, Block Elements, and Geometric Shapes blocks. +""" + +# std imports +import codecs + +# Decoding Table -- PETSCII shifted (lowercase) mode, 256 entries. +# +# 0x00-0x1F : C64 control codes +# 0x20-0x3F : ASCII compatible (digits, punctuation) +# 0x40-0x5F : @, lowercase a-z, [, pound, ], up-arrow, left-arrow +# 0x60-0x7F : graphics characters +# 0x80-0x9F : C64 control codes (colors, function keys) +# 0xA0-0xBF : graphics characters (shifted) +# 0xC0-0xDF : graphics (0xC0) + uppercase A-Z (0xC1-0xDA) + graphics +# 0xE0-0xFE : graphics characters (same as 0xA0-0xBE) +# 0xFF : pi + +DECODING_TABLE = ( + # 0x00-0x1F: Control codes + '\x00' # 0x00 NUL + '\x01' # 0x01 (unused) + '\x02' # 0x02 (unused) + '\x03' # 0x03 RUN/STOP + '\x04' # 0x04 (unused) + '\x05' # 0x05 WHT (white) + '\x06' # 0x06 (unused) + '\x07' # 0x07 BEL + '\x08' # 0x08 shift-disable + '\x09' # 0x09 shift-enable + '\n' # 0x0A LF + '\x0b' # 0x0B (unused) + '\x0c' # 0x0C (unused) + '\r' # 0x0D RETURN + '\x0e' # 0x0E lowercase charset + '\x0f' # 0x0F (unused) + '\x10' # 0x10 (unused) + '\x11' # 0x11 cursor down + '\x12' # 0x12 RVS ON + '\x13' # 0x13 HOME + '\x14' # 0x14 DEL + '\x15' # 0x15 (unused) + '\x16' # 0x16 (unused) + '\x17' # 0x17 (unused) + '\x18' # 0x18 (unused) + '\x19' # 0x19 (unused) + '\x1a' # 0x1A (unused) + '\x1b' # 0x1B ESC + '\x1c' # 0x1C RED + '\x1d' # 0x1D cursor right + '\x1e' # 0x1E GRN + '\x1f' # 0x1F BLU + # 0x20-0x3F: ASCII compatible + ' ' # 0x20 SPACE + '!' # 0x21 + '"' # 0x22 + '#' # 0x23 + '$' # 0x24 + '%' # 0x25 + '&' # 0x26 + "'" # 0x27 + '(' # 0x28 + ')' # 0x29 + '*' # 0x2A + '+' # 0x2B + ',' # 0x2C + '-' # 0x2D + '.' # 0x2E + '/' # 0x2F + '0' # 0x30 + '1' # 0x31 + '2' # 0x32 + '3' # 0x33 + '4' # 0x34 + '5' # 0x35 + '6' # 0x36 + '7' # 0x37 + '8' # 0x38 + '9' # 0x39 + ':' # 0x3A + ';' # 0x3B + '<' # 0x3C + '=' # 0x3D + '>' # 0x3E + '?' # 0x3F + # 0x40-0x5F: Letters and symbols + '@' # 0x40 + 'a' # 0x41 lowercase (PETSCII shifted mode) + 'b' # 0x42 + 'c' # 0x43 + 'd' # 0x44 + 'e' # 0x45 + 'f' # 0x46 + 'g' # 0x47 + 'h' # 0x48 + 'i' # 0x49 + 'j' # 0x4A + 'k' # 0x4B + 'l' # 0x4C + 'm' # 0x4D + 'n' # 0x4E + 'o' # 0x4F + 'p' # 0x50 + 'q' # 0x51 + 'r' # 0x52 + 's' # 0x53 + 't' # 0x54 + 'u' # 0x55 + 'v' # 0x56 + 'w' # 0x57 + 'x' # 0x58 + 'y' # 0x59 + 'z' # 0x5A + '[' # 0x5B + '\u00a3' # 0x5C POUND SIGN + ']' # 0x5D + '\u2191' # 0x5E UP ARROW + '\u2190' # 0x5F LEFT ARROW + # 0x60-0x7F: Graphics characters (shifted mode) + '\u2500' # 0x60 HORIZONTAL LINE + '\u2660' # 0x61 BLACK SPADE SUIT + '\u2502' # 0x62 VERTICAL LINE + '\u2500' # 0x63 HORIZONTAL LINE + '\u2597' # 0x64 QUADRANT LOWER RIGHT + '\u2596' # 0x65 QUADRANT LOWER LEFT + '\u2598' # 0x66 QUADRANT UPPER LEFT + '\u259d' # 0x67 QUADRANT UPPER RIGHT + '\u2599' # 0x68 QUADRANT UPPER LEFT AND LOWER LEFT AND LOWER RIGHT + '\u259f' # 0x69 QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT + '\u259e' # 0x6A QUADRANT UPPER RIGHT AND LOWER LEFT + '\u2595' # 0x6B RIGHT ONE EIGHTH BLOCK + '\u258f' # 0x6C LEFT ONE EIGHTH BLOCK + '\u2584' # 0x6D LOWER HALF BLOCK + '\u2580' # 0x6E UPPER HALF BLOCK + '\u2588' # 0x6F FULL BLOCK + '\u2584' # 0x70 LOWER HALF BLOCK (variant) + '\u259b' # 0x71 QUADRANT UPPER LEFT AND UPPER RIGHT AND LOWER LEFT + '\u2583' # 0x72 LOWER THREE EIGHTHS BLOCK + '\u2665' # 0x73 BLACK HEART SUIT + '\u259c' # 0x74 QUADRANT UPPER LEFT AND UPPER RIGHT AND LOWER RIGHT + '\u256d' # 0x75 BOX DRAWINGS LIGHT ARC DOWN AND RIGHT + '\u2573' # 0x76 BOX DRAWINGS LIGHT DIAGONAL CROSS + '\u25cb' # 0x77 WHITE CIRCLE + '\u2663' # 0x78 BLACK CLUB SUIT + '\u259a' # 0x79 QUADRANT UPPER LEFT AND LOWER RIGHT + '\u2666' # 0x7A BLACK DIAMOND SUIT + '\u253c' # 0x7B BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + '\u2502' # 0x7C VERTICAL LINE (with serif, approx) + '\u2571' # 0x7D BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT + '\u03c0' # 0x7E GREEK SMALL LETTER PI + '\u25e5' # 0x7F BLACK UPPER RIGHT TRIANGLE + # 0x80-0x9F: Control codes (colors, function keys, cursor) + '\x80' # 0x80 (unused) + '\x81' # 0x81 ORN (orange) + '\x82' # 0x82 (unused) + '\x83' # 0x83 (unused) + '\x84' # 0x84 (unused) + '\x85' # 0x85 F1 + '\x86' # 0x86 F3 + '\x87' # 0x87 F5 + '\x88' # 0x88 F7 + '\x89' # 0x89 F2 + '\x8a' # 0x8A F4 + '\x8b' # 0x8B F6 + '\x8c' # 0x8C F8 + '\r' # 0x8D SHIFT-RETURN + '\x8e' # 0x8E uppercase charset + '\x8f' # 0x8F (unused) + '\x90' # 0x90 BLK (black) + '\x91' # 0x91 cursor up + '\x92' # 0x92 RVS OFF + '\x93' # 0x93 CLR (clear screen) + '\x94' # 0x94 INS (insert) + '\x95' # 0x95 BRN (brown) + '\x96' # 0x96 LRD (light red) + '\x97' # 0x97 GR1 (dark grey) + '\x98' # 0x98 GR2 (medium grey) + '\x99' # 0x99 LGR (light green) + '\x9a' # 0x9A LBL (light blue) + '\x9b' # 0x9B GR3 (light grey) + '\x9c' # 0x9C PUR (purple) + '\x9d' # 0x9D cursor left + '\x9e' # 0x9E YEL (yellow) + '\x9f' # 0x9F CYN (cyan) + # 0xA0-0xBF: Shifted graphics + '\xa0' # 0xA0 SHIFTED SPACE (non-breaking) + '\u2584' # 0xA1 LOWER HALF BLOCK + '\u2580' # 0xA2 UPPER HALF BLOCK + '\u2500' # 0xA3 HORIZONTAL LINE + '\u2500' # 0xA4 HORIZONTAL LINE (lower) + '\u2500' # 0xA5 HORIZONTAL LINE (upper) + '\u2502' # 0xA6 VERTICAL LINE (right shifted) + '\u2502' # 0xA7 VERTICAL LINE (left shifted) + '\u2502' # 0xA8 VERTICAL LINE + '\u256e' # 0xA9 BOX DRAWINGS LIGHT ARC DOWN AND LEFT + '\u2570' # 0xAA BOX DRAWINGS LIGHT ARC UP AND RIGHT + '\u256f' # 0xAB BOX DRAWINGS LIGHT ARC UP AND LEFT + '\u2572' # 0xAC BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT + '\u2571' # 0xAD BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT + '\u2573' # 0xAE BOX DRAWINGS LIGHT DIAGONAL CROSS (small) + '\u2022' # 0xAF BULLET + '\u25e4' # 0xB0 BLACK UPPER LEFT TRIANGLE + '\u258c' # 0xB1 LEFT HALF BLOCK + '\u2597' # 0xB2 QUADRANT LOWER RIGHT + '\u2514' # 0xB3 BOX DRAWINGS LIGHT UP AND RIGHT + '\u2510' # 0xB4 BOX DRAWINGS LIGHT DOWN AND LEFT + '\u2582' # 0xB5 LOWER ONE QUARTER BLOCK + '\u250c' # 0xB6 BOX DRAWINGS LIGHT DOWN AND RIGHT + '\u2534' # 0xB7 BOX DRAWINGS LIGHT UP AND HORIZONTAL + '\u252c' # 0xB8 BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + '\u2524' # 0xB9 BOX DRAWINGS LIGHT VERTICAL AND LEFT + '\u251c' # 0xBA BOX DRAWINGS LIGHT VERTICAL AND RIGHT + '\u2586' # 0xBB LOWER THREE QUARTERS BLOCK + '\u2585' # 0xBC LOWER FIVE EIGHTHS BLOCK + '\u2590' # 0xBD RIGHT HALF BLOCK + '\u2588' # 0xBE FULL BLOCK (variant) + '\u2572' # 0xBF DIAGONAL (variant) + # 0xC0-0xDF: Horizontal line + uppercase A-Z + graphics + '\u2500' # 0xC0 HORIZONTAL LINE (same as 0x60) + 'A' # 0xC1 LATIN CAPITAL LETTER A + 'B' # 0xC2 LATIN CAPITAL LETTER B + 'C' # 0xC3 LATIN CAPITAL LETTER C + 'D' # 0xC4 LATIN CAPITAL LETTER D + 'E' # 0xC5 LATIN CAPITAL LETTER E + 'F' # 0xC6 LATIN CAPITAL LETTER F + 'G' # 0xC7 LATIN CAPITAL LETTER G + 'H' # 0xC8 LATIN CAPITAL LETTER H + 'I' # 0xC9 LATIN CAPITAL LETTER I + 'J' # 0xCA LATIN CAPITAL LETTER J + 'K' # 0xCB LATIN CAPITAL LETTER K + 'L' # 0xCC LATIN CAPITAL LETTER L + 'M' # 0xCD LATIN CAPITAL LETTER M + 'N' # 0xCE LATIN CAPITAL LETTER N + 'O' # 0xCF LATIN CAPITAL LETTER O + 'P' # 0xD0 LATIN CAPITAL LETTER P + 'Q' # 0xD1 LATIN CAPITAL LETTER Q + 'R' # 0xD2 LATIN CAPITAL LETTER R + 'S' # 0xD3 LATIN CAPITAL LETTER S + 'T' # 0xD4 LATIN CAPITAL LETTER T + 'U' # 0xD5 LATIN CAPITAL LETTER U + 'V' # 0xD6 LATIN CAPITAL LETTER V + 'W' # 0xD7 LATIN CAPITAL LETTER W + 'X' # 0xD8 LATIN CAPITAL LETTER X + 'Y' # 0xD9 LATIN CAPITAL LETTER Y + 'Z' # 0xDA LATIN CAPITAL LETTER Z + '\u253c' # 0xDB BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + '\u2502' # 0xDC VERTICAL LINE (with tick) + '\u2571' # 0xDD DIAGONAL + '\u03c0' # 0xDE GREEK SMALL LETTER PI + '\u25e5' # 0xDF BLACK UPPER RIGHT TRIANGLE + # 0xE0-0xFE: Graphics (same as 0xA0-0xBE) + '\xa0' # 0xE0 SHIFTED SPACE + '\u2584' # 0xE1 LOWER HALF BLOCK + '\u2580' # 0xE2 UPPER HALF BLOCK + '\u2500' # 0xE3 HORIZONTAL LINE + '\u2500' # 0xE4 HORIZONTAL LINE (lower) + '\u2500' # 0xE5 HORIZONTAL LINE (upper) + '\u2502' # 0xE6 VERTICAL LINE (right shifted) + '\u2502' # 0xE7 VERTICAL LINE (left shifted) + '\u2502' # 0xE8 VERTICAL LINE + '\u256e' # 0xE9 BOX DRAWINGS LIGHT ARC DOWN AND LEFT + '\u2570' # 0xEA BOX DRAWINGS LIGHT ARC UP AND RIGHT + '\u256f' # 0xEB BOX DRAWINGS LIGHT ARC UP AND LEFT + '\u2572' # 0xEC DIAGONAL + '\u2571' # 0xED DIAGONAL + '\u2573' # 0xEE BOX DRAWINGS LIGHT DIAGONAL CROSS + '\u2022' # 0xEF BULLET + '\u25e4' # 0xF0 BLACK UPPER LEFT TRIANGLE + '\u258c' # 0xF1 LEFT HALF BLOCK + '\u2597' # 0xF2 QUADRANT LOWER RIGHT + '\u2514' # 0xF3 BOX DRAWINGS LIGHT UP AND RIGHT + '\u2510' # 0xF4 BOX DRAWINGS LIGHT DOWN AND LEFT + '\u2582' # 0xF5 LOWER ONE QUARTER BLOCK + '\u250c' # 0xF6 BOX DRAWINGS LIGHT DOWN AND RIGHT + '\u2534' # 0xF7 BOX DRAWINGS LIGHT UP AND HORIZONTAL + '\u252c' # 0xF8 BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + '\u2524' # 0xF9 BOX DRAWINGS LIGHT VERTICAL AND LEFT + '\u251c' # 0xFA BOX DRAWINGS LIGHT VERTICAL AND RIGHT + '\u2586' # 0xFB LOWER THREE QUARTERS BLOCK + '\u2585' # 0xFC LOWER FIVE EIGHTHS BLOCK + '\u2590' # 0xFD RIGHT HALF BLOCK + '\u2588' # 0xFE FULL BLOCK + '\u03c0' # 0xFF PI +) + +assert len(DECODING_TABLE) == 256 + + +class Codec(codecs.Codec): + """PETSCII character map codec.""" + + def encode(self, input, errors='strict'): # pylint: disable=redefined-builtin + """Encode input string using PETSCII character map.""" + return codecs.charmap_encode(input, errors, ENCODING_TABLE) + + def decode(self, input, errors='strict'): # pylint: disable=redefined-builtin + """Decode input bytes using PETSCII character map.""" + return codecs.charmap_decode(input, errors, DECODING_TABLE) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + """PETSCII incremental encoder.""" + + def encode(self, input, final=False): # pylint: disable=redefined-builtin + """Encode input string incrementally.""" + return codecs.charmap_encode(input, self.errors, ENCODING_TABLE)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): + """PETSCII incremental decoder.""" + + def decode(self, input, final=False): # pylint: disable=redefined-builtin + """Decode input bytes incrementally.""" + return codecs.charmap_decode(input, self.errors, DECODING_TABLE)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): + """PETSCII stream writer.""" + + +class StreamReader(Codec, codecs.StreamReader): + """PETSCII stream reader.""" + + +def getregentry(): + """Return the codec registry entry.""" + return codecs.CodecInfo( + name='petscii', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +def getaliases(): + """Return codec aliases.""" + return ('cbm', 'commodore', 'c64', 'c128') + + +ENCODING_TABLE = codecs.charmap_build(DECODING_TABLE) diff --git a/telnetlib3/fingerprinting.py b/telnetlib3/fingerprinting.py index b2069f9d..a2268819 100644 --- a/telnetlib3/fingerprinting.py +++ b/telnetlib3/fingerprinting.py @@ -28,10 +28,14 @@ DO, DET, EOR, + MSP, + MXP, RCP, RSP, SGA, TLS, + ZMP, + ATCP, DONT, ECHO, GMCP, @@ -63,6 +67,7 @@ CHARSET, ENCRYPT, TN3270E, + AARDWOLF, LINEMODE, SEND_URL, XDISPLOC, @@ -254,6 +259,11 @@ class FingerprintingServer(FingerprintingTelnetServer, TelnetServer): (GMCP, "GMCP", "Generic MUD Communication Protocol"), (MSDP, "MSDP", "MUD Server Data Protocol"), (MSSP, "MSSP", "MUD Server Status Protocol"), + (MSP, "MSP", "MUD Sound Protocol"), + (MXP, "MXP", "MUD eXtension Protocol"), + (ZMP, "ZMP", "Zenith MUD Protocol"), + (AARDWOLF, "AARDWOLF", "Aardwolf protocol"), + (ATCP, "ATCP", "Achaea Telnet Client Protocol"), ] LEGACY_OPTIONS = [ @@ -855,11 +865,23 @@ def _cooked_input(prompt: str) -> str: termios.tcsetattr(fd, termios.TCSANOW, old_attrs) +class _BytesSafeEncoder(json.JSONEncoder): + """JSON encoder that converts bytes to str (UTF-8) or hex.""" + + def default(self, o: Any) -> Any: + if isinstance(o, bytes): + try: + return o.decode("utf-8") + except UnicodeDecodeError: + return o.hex() + return super().default(o) + + def _atomic_json_write(filepath: str, data: dict[str, Any]) -> None: """Atomically write JSON data to file via write-to-new + rename.""" tmp_path = os.path.splitext(filepath)[0] + ".json.new" with open(tmp_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2, sort_keys=True) + json.dump(data, f, indent=2, sort_keys=True, cls=_BytesSafeEncoder) os.replace(tmp_path, filepath) @@ -902,6 +924,8 @@ def _build_session_fingerprint( rejected = _collect_rejected_options(writer) if rejected: result["rejected"] = rejected + if writer.comport_data: + result["comport"] = writer.comport_data return result @@ -980,7 +1004,8 @@ def _is_maybe_mud(writer: Union[TelnetWriter, TelnetWriterUnicode]) -> bool: for key in ("ttype1", "ttype2", "ttype3"): if (writer.get_extra_info(key) or "").lower() in MUD_TERMINALS: return True - if writer.remote_option.enabled(GMCP) or writer.remote_option.enabled(MSDP): + mud_opts = (GMCP, MSDP, MXP, MSP, ATCP, AARDWOLF) + if any(writer.remote_option.enabled(opt) for opt in mud_opts): return True return False diff --git a/telnetlib3/mud.py b/telnetlib3/mud.py index 2afde5dc..f6afd6c2 100644 --- a/telnetlib3/mud.py +++ b/telnetlib3/mud.py @@ -5,6 +5,9 @@ - GMCP (Generic MUD Communication Protocol, option 201) - MSDP (MUD Server Data Protocol, option 69) - MSSP (MUD Server Status Protocol, option 70) +- ZMP (Zenith MUD Protocol, option 93) +- ATCP (Achaea Telnet Client Protocol, option 200) +- AARDWOLF (Aardwolf protocol, option 102) All encode functions return the payload bytes only (the content between ``IAC SB