diff --git a/docs/api/color_filter.rst b/docs/api/color_filter.rst new file mode 100644 index 00000000..79df2de3 --- /dev/null +++ b/docs/api/color_filter.rst @@ -0,0 +1,7 @@ +color_filter +------------ + +.. automodule:: telnetlib3.color_filter + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index e0e2f610..9a66929d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -60,17 +60,18 @@ # General information about the project. project = "telnetlib3" -copyright = "2013 Jeff Quast" +import datetime +copyright = f"2013-{datetime.datetime.now().year} Jeff Quast" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = "2.3" +version = "2.4" # The full version, including alpha/beta/rc tags. -release = "2.3.0" # keep in sync with pyproject.toml and telnetlib3/accessories.py !! +release = "2.4.0" # keep in sync with pyproject.toml and telnetlib3/accessories.py !! # The language for content auto-generated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/history.rst b/docs/history.rst index 369d1c92..34d271e0 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -1,6 +1,43 @@ History ======= +2.4.0 + * new: :mod:`telnetlib3.color_filter` module — translates 16-color ANSI SGR + codes to 24-bit RGB from hardware palettes (EGA, CGA, VGA, Amiga, xterm). + Enabled by default. New client CLI options: ``--colormatch``, + ``--color-brightness``, ``--color-contrast``, ``--background-color``, + ``--reverse-video``. + * new: :func:`~telnetlib3.mud.zmp_decode`, + :func:`~telnetlib3.mud.atcp_decode`, and + :func:`~telnetlib3.mud.aardwolf_decode` decode functions for ZMP (option + 93), ATCP (option 200), and Aardwolf (option 102) MUD protocols. + * new: :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_zmp`, + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_atcp`, + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_aardwolf`, + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_msp`, and + :meth:`~telnetlib3.stream_writer.TelnetWriter.handle_mxp` callbacks for + receiving MUD extended protocol subnegotiations, with accumulated data + stored in ``zmp_data``, ``atcp_data``, and ``aardwolf_data`` attributes. + * new: COM-PORT-OPTION (:rfc:`2217`) subnegotiation parsing with + ``comport_data`` attribute and + :meth:`~telnetlib3.stream_writer.TelnetWriter.request_comport_signature`. + * enhancement: ``telnetlib3-fingerprint`` now always probes extended MUD + options (MSP, MXP, ZMP, AARDWOLF, ATCP) during server scans and captures + ZMP, ATCP, Aardwolf, MXP, and COM-PORT data in session output. + * enhancement: ``telnetlib3-fingerprint`` smart prompt detection — + auto-answers yes/no, color, UTF-8 menu, ``who``, and ``help`` prompts. + * enhancement: ``--banner-max-bytes`` option for ``telnetlib3-fingerprint``; + default raised from 1024 to 65536. + * enhancement: new ``--encoding=petscii`` and ``--encoding=atarist`` + * bugfix: rare LINEMODE ACK loop with misbehaving servers that re-send + unchanged MODE without ACK. + * bugfix: unknown IAC commands no longer raise ``ValueError``; treated as + data. + * bugfix: client no longer asserts on ``TTYPE IS`` from server. + * bugfix: ``request_forwardmask()`` only called on server side. + * change: ``wcwidth`` is now a required dependency. + + 2.3.0 * bugfix: repeat "socket.send() raised exception." exceptions * bugfix: server incorrectly accepted ``DO TSPEED`` and ``DO SNDLOC`` diff --git a/pyproject.toml b/pyproject.toml index 124f2bed..80c0d4c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "telnetlib3" -version = "2.3.0" +version = "2.4.0" description = " Python Telnet server and client CLI and Protocol library" readme = "README.rst" license = "ISC" @@ -43,6 +43,9 @@ classifiers = [ "Topic :: Terminals :: Telnet", ] requires-python = ">=3.9" +dependencies = [ + "wcwidth>=0.2.13", +] [project.optional-dependencies] docs = [ diff --git a/telnetlib3/__init__.py b/telnetlib3/__init__.py index 4b530fd2..24183b8c 100644 --- a/telnetlib3/__init__.py +++ b/telnetlib3/__init__.py @@ -16,6 +16,7 @@ from . import stream_reader from . import client_base from . import client_shell +from . import color_filter from . import client from . import telopt from . import mud @@ -26,6 +27,7 @@ from . import server_fingerprinting if sys.platform != "win32": from . import fingerprinting_display # noqa: F401 +from . import encodings # noqa: F401 - registers custom codecs (petscii, atarist) from . import sync from .server_base import * # noqa from .server import * # noqa diff --git a/telnetlib3/client.py b/telnetlib3/client.py index cd574667..4c2a3e06 100755 --- a/telnetlib3/client.py +++ b/telnetlib3/client.py @@ -528,11 +528,48 @@ def _patched_connection_made(transport: asyncio.BaseTransport) -> None: client_factory = _client_factory + # Wrap the shell callback to inject color filter when enabled + colormatch: str = args["colormatch"] + shell_callback = args["shell"] + if colormatch.lower() != "none": + # local + from .color_filter import ( # pylint: disable=import-outside-toplevel + PALETTES, + ColorConfig, + ColorFilter, + ) + + if colormatch not in PALETTES: + print( + f"Unknown palette {colormatch!r}," f" available: {', '.join(sorted(PALETTES))}", + file=sys.stderr, + ) + sys.exit(1) + color_config = ColorConfig( + palette_name=colormatch, + brightness=args["color_brightness"], + contrast=args["color_contrast"], + background_color=args["background_color"], + reverse_video=args["reverse_video"], + ) + color_filter = ColorFilter(color_config) + original_shell = shell_callback + + async def _color_shell( + reader: Union[TelnetReader, TelnetReaderUnicode], + writer_arg: Union[TelnetWriter, TelnetWriterUnicode], + ) -> None: + # pylint: disable-next=protected-access + writer_arg._color_filter = color_filter # type: ignore[union-attr] + await original_shell(reader, writer_arg) + + shell_callback = _color_shell + # Build connection kwargs explicitly to avoid pylint false positive connection_kwargs: Dict[str, Any] = { "encoding": args["encoding"], "tspeed": args["tspeed"], - "shell": args["shell"], + "shell": shell_callback, "term": args["term"], "force_binary": args["force_binary"], "encoding_errors": args["encoding_errors"], @@ -607,6 +644,43 @@ def _get_argument_parser() -> argparse.ArgumentParser: metavar="OPT", help="always send DO for this option (name like GMCP or number, repeatable)", ) + parser.add_argument( + "--colormatch", + default="ega", + metavar="PALETTE", + help=( + "translate basic 16-color ANSI codes to exact 24-bit RGB values" + " from a named hardware palette, bypassing the terminal's custom" + " palette to preserve intended MUD/BBS artwork colors" + " (ega, cga, vga, amiga, xterm, none)" + ), + ) + parser.add_argument( + "--color-brightness", + default=0.9, + type=float, + metavar="FLOAT", + help="color brightness scale [0.0..1.0], where 1.0 is original", + ) + parser.add_argument( + "--color-contrast", + default=0.8, + type=float, + metavar="FLOAT", + help="color contrast scale [0.0..1.0], where 1.0 is original", + ) + parser.add_argument( + "--background-color", + default="#101010", + metavar="#RRGGBB", + help="forced background color as hex RGB (near-black by default)", + ) + parser.add_argument( + "--reverse-video", + action="store_true", + default=False, + help="swap foreground/background for light-background terminals", + ) return parser @@ -627,6 +701,20 @@ def _parse_option_arg(value: str) -> bytes: return bytes([int(value)]) +def _parse_background_color(value: str) -> Tuple[int, int, int]: + """ + Parse hex color string to RGB tuple. + + :param value: Color string like ``"#RRGGBB"`` or ``"RRGGBB"``. + :returns: (R, G, B) tuple with values 0-255. + :raises ValueError: When *value* is not a valid hex color. + """ + h = value.lstrip("#") + if len(h) != 6: + raise ValueError(f"invalid hex color: {value!r}") + return (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + + def _transform_args(args: argparse.Namespace) -> Dict[str, Any]: return { "host": args.host, @@ -645,6 +733,11 @@ def _transform_args(args: argparse.Namespace) -> Dict[str, Any]: "send_environ": tuple(v.strip() for v in args.send_environ.split(",") if v.strip()), "always_will": {_parse_option_arg(v) for v in args.always_will}, "always_do": {_parse_option_arg(v) for v in args.always_do}, + "colormatch": args.colormatch, + "color_brightness": args.color_brightness, + "color_contrast": args.color_contrast, + "background_color": _parse_background_color(args.background_color), + "reverse_video": args.reverse_video, } @@ -741,6 +834,9 @@ def _get_fingerprint_argument_parser() -> argparse.ArgumentParser: parser.add_argument( "--banner-max-wait", default=8.0, type=float, help="max seconds to wait for banner data" ) + parser.add_argument( + "--banner-max-bytes", default=65536, type=int, help="max bytes per banner read call" + ) return parser @@ -778,6 +874,7 @@ async def run_fingerprint_client() -> None: mssp_wait=args.mssp_wait, banner_quiet_time=args.banner_quiet_time, banner_max_wait=args.banner_max_wait, + banner_max_bytes=args.banner_max_bytes, ) # Parse --always-will/--always-do option names/numbers diff --git a/telnetlib3/client_base.py b/telnetlib3/client_base.py index d62e6760..d28dd813 100644 --- a/telnetlib3/client_base.py +++ b/telnetlib3/client_base.py @@ -123,7 +123,7 @@ def connection_lost(self, exc: Optional[Exception]) -> None: # the StreamReader will receive eof. self._waiter_connected.set_result(None) - if self.shell is None: + if self.shell is None and not self.waiter_closed.done(): # when a shell is defined, we allow the completion of the coroutine # to set the result of waiter_closed. self.waiter_closed.set_result(weakref.proxy(self)) @@ -200,6 +200,7 @@ def begin_shell(self, future: asyncio.Future[None]) -> None: lambda fut_obj: ( self.waiter_closed.set_result(weakref.proxy(self)) if self.waiter_closed is not None + and not self.waiter_closed.done() else None ) ) diff --git a/telnetlib3/client_shell.py b/telnetlib3/client_shell.py index 39532fe8..f63516a0 100644 --- a/telnetlib3/client_shell.py +++ b/telnetlib3/client_shell.py @@ -242,6 +242,11 @@ def _on_winch() -> None: if telnet_task in wait_for: telnet_task.cancel() wait_for.remove(telnet_task) + _cf = getattr(telnet_writer, "_color_filter", None) + if _cf is not None: + _flush = _cf.flush() + if _flush: + stdout.write(_flush.encode()) stdout.write(f"\033[m{linesep}Connection closed.{linesep}".encode()) # Cleanup resize handler on local escape close if term._istty and remove_winch: # pylint: disable=protected-access @@ -273,6 +278,11 @@ def _on_winch() -> None: if stdin_task in wait_for: stdin_task.cancel() wait_for.remove(stdin_task) + _cf = getattr(telnet_writer, "_color_filter", None) + if _cf is not None: + _flush = _cf.flush() + if _flush: + stdout.write(_flush.encode()) stdout.write( f"\033[m{linesep}Connection closed by foreign host.{linesep}".encode() ) @@ -289,6 +299,9 @@ def _on_winch() -> None: except Exception: # pylint: disable=broad-exception-caught pass else: + _cf = getattr(telnet_writer, "_color_filter", None) + if _cf is not None: + out = _cf.filter(out) stdout.write(out.encode() or b":?!?:") telnet_task = accessories.make_reader_task(telnet_reader, size=2**24) wait_for.add(telnet_task) diff --git a/telnetlib3/color_filter.py b/telnetlib3/color_filter.py new file mode 100644 index 00000000..4120c3a7 --- /dev/null +++ b/telnetlib3/color_filter.py @@ -0,0 +1,431 @@ +""" +ANSI color palette translation for telnet client output. + +Most modern terminals use custom palette colors for ANSI colors 0-15 (e.g. +Solarized, Dracula, Gruvbox themes). When connecting to MUDs and BBS systems, +the artwork and text colors were designed for specific hardware palettes such as +IBM EGA, VGA, or Amiga. The terminal's custom palette distorts the intended +colors, often ruining ANSI artwork. + +By translating basic 16-color SGR codes into their exact 24-bit RGB equivalents +from named hardware palettes, we bypass the terminal's palette entirely and +display the colors the artist intended. + +This feature is enabled by default using the EGA palette. Use +``--colormatch=none`` on the ``telnetlib3-client`` command line to disable it. + +Example usage:: + + # Default EGA palette with brightness/contrast adjustment + telnetlib3-client mud.example.com 4000 + + # Use VGA palette instead + telnetlib3-client --colormatch=vga mud.example.com + + # Disable color translation entirely + telnetlib3-client --colormatch=none mud.example.com + + # Custom brightness and contrast + telnetlib3-client --color-brightness=0.7 --color-contrast=0.6 mud.example.com + + # White-background terminal (reverse video) + telnetlib3-client --reverse-video mud.example.com +""" + +from __future__ import annotations + +# std imports +import re +from typing import Dict, List, Match, Tuple, Optional, NamedTuple + +# 3rd party +from wcwidth.sgr_state import _SGR_PATTERN + +__all__ = ("ColorConfig", "ColorFilter", "PALETTES") + +# Type alias for a 16-color palette: 16 (R, G, B) tuples indexed 0-15. +# Index 0-7: normal colors (black, red, green, yellow, blue, magenta, cyan, white) +# Index 8-15: bright variants of the same order. +PaletteRGB = Tuple[ + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], + Tuple[int, int, int], +] + +# Hardware color palettes. Each defines exact RGB values for ANSI colors 0-15. +PALETTES: Dict[str, PaletteRGB] = { + # IBM Enhanced Graphics Adapter -- the classic DOS palette used by most + # BBS and MUD ANSI artwork. + "ega": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 85, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (170, 170, 170), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # IBM Color Graphics Adapter -- earlier, more saturated palette. + "cga": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 170, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (170, 170, 170), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # VGA / DOS standard palette -- the most common DOS palette, very close + # to EGA but with a brighter dark yellow. + "vga": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 85, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (170, 170, 170), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # Amiga Workbench 1.x palette -- warmer tones characteristic of the + # Commodore Amiga. + "amiga": ( + (0, 0, 0), + (170, 0, 0), + (0, 170, 0), + (170, 170, 0), + (0, 0, 170), + (170, 0, 170), + (0, 170, 170), + (187, 187, 187), + (85, 85, 85), + (255, 85, 85), + (85, 255, 85), + (255, 255, 85), + (85, 85, 255), + (255, 85, 255), + (85, 255, 255), + (255, 255, 255), + ), + # xterm default palette -- the standard xterm color table. + "xterm": ( + (0, 0, 0), + (205, 0, 0), + (0, 205, 0), + (205, 205, 0), + (0, 0, 238), + (205, 0, 205), + (0, 205, 205), + (229, 229, 229), + (127, 127, 127), + (255, 0, 0), + (0, 255, 0), + (255, 255, 0), + (92, 92, 255), + (255, 0, 255), + (0, 255, 255), + (255, 255, 255), + ), +} + +# Detect potentially incomplete escape sequence at end of a chunk. +_TRAILING_ESC = re.compile(r"\x1b(\[[\d;:]*)?$") + + +class ColorConfig(NamedTuple): + """ + Configuration for ANSI color palette translation. + + :param palette_name: Name of the hardware palette to use (key in PALETTES). + :param brightness: Brightness scale factor [0.0..1.0], where 1.0 is original. + :param contrast: Contrast scale factor [0.0..1.0], where 1.0 is original. + :param background_color: Forced background RGB as (R, G, B) tuple. + :param reverse_video: When True, swap fg/bg for light-background terminals. + """ + + palette_name: str = "ega" + brightness: float = 0.9 + contrast: float = 0.8 + background_color: Tuple[int, int, int] = (16, 16, 16) + reverse_video: bool = False + + +def _sgr_code_to_palette_index(code: int) -> Optional[int]: + """ + Map a basic SGR color code to a palette index (0-15). + + :param code: SGR parameter value (30-37, 40-47, 90-97, or 100-107). + :returns: Palette index 0-15, or None if not a basic color code. + """ + if 30 <= code <= 37: + return code - 30 + if 40 <= code <= 47: + return code - 40 + if 90 <= code <= 97: + return code - 90 + 8 + if 100 <= code <= 107: + return code - 100 + 8 + return None + + +def _is_foreground_code(code: int) -> bool: + """ + Return True if *code* is a foreground color SGR parameter. + + :param code: SGR parameter value. + :returns: True for foreground codes (30-37, 90-97). + """ + return (30 <= code <= 37) or (90 <= code <= 97) + + +def _adjust_color( + r: int, g: int, b: int, brightness: float, contrast: float +) -> Tuple[int, int, int]: + """ + Apply brightness and contrast scaling to an RGB color. + + Brightness scales linearly toward black (0.0 = black, 1.0 = original). + Contrast scales linearly toward mid-gray (0.0 = flat gray, 1.0 = original). + Result is clamped to 0-255. + + :param r: Red channel (0-255). + :param g: Green channel (0-255). + :param b: Blue channel (0-255). + :param brightness: Brightness factor [0.0..1.0]. + :param contrast: Contrast factor [0.0..1.0]. + :returns: Adjusted (R, G, B) tuple. + """ + mid = 127.5 + r_f = mid + (r * brightness - mid) * contrast + g_f = mid + (g * brightness - mid) * contrast + b_f = mid + (b * brightness - mid) * contrast + return ( + max(0, min(255, int(r_f + 0.5))), + max(0, min(255, int(g_f + 0.5))), + max(0, min(255, int(b_f + 0.5))), + ) + + +class ColorFilter: + """ + Stateful ANSI color palette translation filter. + + Translates basic 16-color ANSI SGR codes to 24-bit RGB equivalents from a named hardware + palette, with brightness/contrast adjustment and background color enforcement. + + The filter is designed to process chunked text (as received from a telnet connection) and + correctly handles escape sequences split across chunk boundaries. + + :param config: Color configuration parameters. + """ + + def __init__(self, config: ColorConfig) -> None: + """Initialize with the given color configuration.""" + self._config = config + palette = PALETTES[config.palette_name] + self._adjusted: List[Tuple[int, int, int]] = [ + _adjust_color(r, g, b, config.brightness, config.contrast) for r, g, b in palette + ] + bg = config.background_color + if config.reverse_video: + bg = (255 - bg[0], 255 - bg[1], 255 - bg[2]) + self._bg_sgr = f"\x1b[48;2;{bg[0]};{bg[1]};{bg[2]}m" + self._buffer = "" + self._initial = True + self._bold = False + + def filter(self, text: str) -> str: + """ + Transform SGR sequences in *text* using the configured palette. + + Handles chunked input by buffering incomplete trailing escape sequences across calls. On + the very first non-empty output, the configured background color is injected. + + :param text: Input text, possibly containing ANSI escape sequences. + :returns: Text with basic colors replaced by 24-bit RGB equivalents. + """ + if self._buffer: + text = self._buffer + text + self._buffer = "" + + match = _TRAILING_ESC.search(text) + if match: + self._buffer = match.group() + text = text[: match.start()] + + if not text: + return "" + + result = _SGR_PATTERN.sub(self._replace_sgr, text) + + if self._initial: + self._initial = False + result = self._bg_sgr + result + return result + + # pylint: disable-next=too-complex,too-many-branches,too-many-statements + def _replace_sgr(self, match: Match[str]) -> str: # noqa: C901 + r""" + Regex replacement callback for a single SGR sequence. + + Tracks bold state across calls so that ``\x1b[1;30m`` (bold + black) uses the bright palette + entry (index 8) instead of pure black. This preserves the traditional "bold as bright" + rendering that legacy systems rely on, which would otherwise be lost when converting to + 24-bit RGB (terminals do not brighten true-color values for bold). + """ + params_str = match.group(1) + + # Empty params or bare "0" → reset + if not params_str: + self._bold = False + return f"\x1b[0m{self._bg_sgr}" + + # Colon-separated extended colors (ITU T.416) — pass through unchanged + if ":" in params_str: + return match.group() + + parts = params_str.split(";") + output_parts: List[str] = [] + i = 0 + has_reset = False + + # Pre-scan: check if bold (1) appears in this sequence so that a + # color code *before* the bold in the same sequence still gets the + # bright treatment, e.g. \x1b[31;1m should brighten red. + seq_sets_bold = False + for part in parts: + try: + val = int(part) if part else 0 + except ValueError: + continue + if val == 1: + seq_sets_bold = True + break + + # Effective bold for color lookups in this sequence + bold = self._bold or seq_sets_bold + + while i < len(parts): + try: + p = int(parts[i]) if parts[i] else 0 + except ValueError: + output_parts.append(parts[i]) + i += 1 + continue + + if p == 0: + has_reset = True + bold = False + output_parts.append("0") + i += 1 + continue + + # Track bold state + if p == 1: + output_parts.append("1") + i += 1 + continue + if p == 22: + bold = False + output_parts.append("22") + i += 1 + continue + + # Extended color — pass through 38;5;N or 38;2;R;G;B verbatim + if p in (38, 48): + start_i = i + i += 1 + if i < len(parts): + try: + mode = int(parts[i]) if parts[i] else 0 + except ValueError: + mode = 0 + i += 1 + if mode == 5 and i < len(parts): + i += 1 + elif mode == 2 and i + 2 < len(parts): + i += 3 + output_parts.extend(parts[start_i:i]) + continue + + # Default fg/bg — pass through + if p in (39, 49): + output_parts.append(str(p)) + i += 1 + continue + + idx = _sgr_code_to_palette_index(p) + if idx is not None: + is_fg = _is_foreground_code(p) + # Bold-as-bright: promote normal fg 30-37 to bright 8-15 + if is_fg and bold and 30 <= p <= 37: + idx += 8 + r, g, b = self._adjusted[idx] + if self._config.reverse_video: + is_fg = not is_fg + if is_fg: + output_parts.extend(["38", "2", str(r), str(g), str(b)]) + else: + output_parts.extend(["48", "2", str(r), str(g), str(b)]) + else: + output_parts.append(str(p)) + i += 1 + + # Update persistent bold state for subsequent sequences + self._bold = bold + + result = f"\x1b[{';'.join(output_parts)}m" if output_parts else "" + if has_reset: + result += self._bg_sgr + return result + + def flush(self) -> str: + """ + Flush any buffered partial escape sequence. + + Call this when the stream closes to emit any remaining buffered bytes. + + :returns: Buffered content (may be an incomplete escape sequence). + """ + result = self._buffer + self._buffer = "" + return result diff --git a/telnetlib3/encodings/__init__.py b/telnetlib3/encodings/__init__.py new file mode 100644 index 00000000..5a950447 --- /dev/null +++ b/telnetlib3/encodings/__init__.py @@ -0,0 +1,48 @@ +""" +Custom BBS/retro-computing codecs for telnetlib3. + +Registers petscii and atarist codecs with Python's codecs module on import. +These encodings are then available for use with ``bytes.decode()`` and the +``--encoding`` CLI flag of ``telnetlib3-fingerprint``. +""" + +# std imports +import codecs +import importlib + +_cache = {} +_aliases = {} + + +def _search_function(encoding): + """Codec search function registered with codecs.register().""" + normalized = encoding.lower().replace('-', '_') + + if normalized in _aliases: + return _aliases[normalized] + + if normalized in _cache: + return _cache[normalized] + + try: + mod = importlib.import_module(f'.{normalized}', package=__name__) + except ImportError: + _cache[normalized] = None + return None + + try: + info = mod.getregentry() + except AttributeError: + _cache[normalized] = None + return None + + _cache[normalized] = info + + if hasattr(mod, 'getaliases'): + for alias in mod.getaliases(): + _aliases[alias] = info + + return info + + +codecs.register(_search_function) diff --git a/telnetlib3/encodings/atarist.py b/telnetlib3/encodings/atarist.py new file mode 100644 index 00000000..c454d19c --- /dev/null +++ b/telnetlib3/encodings/atarist.py @@ -0,0 +1,328 @@ +""" +Atari ST codec. + +Generated from ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT +""" +# pylint: disable=redefined-builtin + +# std imports +import codecs + + +class Codec(codecs.Codec): + """Atari ST character map codec.""" + + def encode(self, input, errors='strict'): + """Encode input string using Atari ST character map.""" + return codecs.charmap_encode(input, errors, ENCODING_TABLE) + + def decode(self, input, errors='strict'): + """Decode input bytes using Atari ST character map.""" + return codecs.charmap_decode(input, errors, DECODING_TABLE) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + """Atari ST incremental encoder.""" + + def encode(self, input, final=False): + """Encode input string incrementally.""" + return codecs.charmap_encode(input, self.errors, ENCODING_TABLE)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): + """Atari ST incremental decoder.""" + + def decode(self, input, final=False): + """Decode input bytes incrementally.""" + return codecs.charmap_decode(input, self.errors, DECODING_TABLE)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): + """Atari ST stream writer.""" + + +class StreamReader(Codec, codecs.StreamReader): + """Atari ST stream reader.""" + + +def getaliases(): + """Return codec aliases.""" + return ('atari',) + + +def getregentry(): + """Return the codec registry entry.""" + return codecs.CodecInfo( + name='atarist', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +# Decoding Table + +DECODING_TABLE = ( + '\x00' # 0x00 -> NULL + '\x01' # 0x01 -> START OF HEADING + '\x02' # 0x02 -> START OF TEXT + '\x03' # 0x03 -> END OF TEXT + '\x04' # 0x04 -> END OF TRANSMISSION + '\x05' # 0x05 -> ENQUIRY + '\x06' # 0x06 -> ACKNOWLEDGE + '\x07' # 0x07 -> BELL + '\x08' # 0x08 -> BACKSPACE + '\t' # 0x09 -> HORIZONTAL TABULATION + '\n' # 0x0A -> LINE FEED + '\x0b' # 0x0B -> VERTICAL TABULATION + '\x0c' # 0x0C -> FORM FEED + '\r' # 0x0D -> CARRIAGE RETURN + '\x0e' # 0x0E -> SHIFT OUT + '\x0f' # 0x0F -> SHIFT IN + '\x10' # 0x10 -> DATA LINK ESCAPE + '\x11' # 0x11 -> DEVICE CONTROL ONE + '\x12' # 0x12 -> DEVICE CONTROL TWO + '\x13' # 0x13 -> DEVICE CONTROL THREE + '\x14' # 0x14 -> DEVICE CONTROL FOUR + '\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE + '\x16' # 0x16 -> SYNCHRONOUS IDLE + '\x17' # 0x17 -> END OF TRANSMISSION BLOCK + '\x18' # 0x18 -> CANCEL + '\x19' # 0x19 -> END OF MEDIUM + '\x1a' # 0x1A -> SUBSTITUTE + '\x1b' # 0x1B -> ESCAPE + '\x1c' # 0x1C -> FILE SEPARATOR + '\x1d' # 0x1D -> GROUP SEPARATOR + '\x1e' # 0x1E -> RECORD SEPARATOR + '\x1f' # 0x1F -> UNIT SEPARATOR + ' ' # 0x20 -> SPACE + '!' # 0x21 -> EXCLAMATION MARK + '"' # 0x22 -> QUOTATION MARK + '#' # 0x23 -> NUMBER SIGN + '$' # 0x24 -> DOLLAR SIGN + '%' # 0x25 -> PERCENT SIGN + '&' # 0x26 -> AMPERSAND + "'" # 0x27 -> APOSTROPHE + '(' # 0x28 -> LEFT PARENTHESIS + ')' # 0x29 -> RIGHT PARENTHESIS + '*' # 0x2A -> ASTERISK + '+' # 0x2B -> PLUS SIGN + ',' # 0x2C -> COMMA + '-' # 0x2D -> HYPHEN-MINUS + '.' # 0x2E -> FULL STOP + '/' # 0x2F -> SOLIDUS + '0' # 0x30 -> DIGIT ZERO + '1' # 0x31 -> DIGIT ONE + '2' # 0x32 -> DIGIT TWO + '3' # 0x33 -> DIGIT THREE + '4' # 0x34 -> DIGIT FOUR + '5' # 0x35 -> DIGIT FIVE + '6' # 0x36 -> DIGIT SIX + '7' # 0x37 -> DIGIT SEVEN + '8' # 0x38 -> DIGIT EIGHT + '9' # 0x39 -> DIGIT NINE + ':' # 0x3A -> COLON + ';' # 0x3B -> SEMICOLON + '<' # 0x3C -> LESS-THAN SIGN + '=' # 0x3D -> EQUALS SIGN + '>' # 0x3E -> GREATER-THAN SIGN + '?' # 0x3F -> QUESTION MARK + '@' # 0x40 -> COMMERCIAL AT + 'A' # 0x41 -> LATIN CAPITAL LETTER A + 'B' # 0x42 -> LATIN CAPITAL LETTER B + 'C' # 0x43 -> LATIN CAPITAL LETTER C + 'D' # 0x44 -> LATIN CAPITAL LETTER D + 'E' # 0x45 -> LATIN CAPITAL LETTER E + 'F' # 0x46 -> LATIN CAPITAL LETTER F + 'G' # 0x47 -> LATIN CAPITAL LETTER G + 'H' # 0x48 -> LATIN CAPITAL LETTER H + 'I' # 0x49 -> LATIN CAPITAL LETTER I + 'J' # 0x4A -> LATIN CAPITAL LETTER J + 'K' # 0x4B -> LATIN CAPITAL LETTER K + 'L' # 0x4C -> LATIN CAPITAL LETTER L + 'M' # 0x4D -> LATIN CAPITAL LETTER M + 'N' # 0x4E -> LATIN CAPITAL LETTER N + 'O' # 0x4F -> LATIN CAPITAL LETTER O + 'P' # 0x50 -> LATIN CAPITAL LETTER P + 'Q' # 0x51 -> LATIN CAPITAL LETTER Q + 'R' # 0x52 -> LATIN CAPITAL LETTER R + 'S' # 0x53 -> LATIN CAPITAL LETTER S + 'T' # 0x54 -> LATIN CAPITAL LETTER T + 'U' # 0x55 -> LATIN CAPITAL LETTER U + 'V' # 0x56 -> LATIN CAPITAL LETTER V + 'W' # 0x57 -> LATIN CAPITAL LETTER W + 'X' # 0x58 -> LATIN CAPITAL LETTER X + 'Y' # 0x59 -> LATIN CAPITAL LETTER Y + 'Z' # 0x5A -> LATIN CAPITAL LETTER Z + '[' # 0x5B -> LEFT SQUARE BRACKET + '\\' # 0x5C -> REVERSE SOLIDUS + ']' # 0x5D -> RIGHT SQUARE BRACKET + '^' # 0x5E -> CIRCUMFLEX ACCENT + '_' # 0x5F -> LOW LINE + '`' # 0x60 -> GRAVE ACCENT + 'a' # 0x61 -> LATIN SMALL LETTER A + 'b' # 0x62 -> LATIN SMALL LETTER B + 'c' # 0x63 -> LATIN SMALL LETTER C + 'd' # 0x64 -> LATIN SMALL LETTER D + 'e' # 0x65 -> LATIN SMALL LETTER E + 'f' # 0x66 -> LATIN SMALL LETTER F + 'g' # 0x67 -> LATIN SMALL LETTER G + 'h' # 0x68 -> LATIN SMALL LETTER H + 'i' # 0x69 -> LATIN SMALL LETTER I + 'j' # 0x6A -> LATIN SMALL LETTER J + 'k' # 0x6B -> LATIN SMALL LETTER K + 'l' # 0x6C -> LATIN SMALL LETTER L + 'm' # 0x6D -> LATIN SMALL LETTER M + 'n' # 0x6E -> LATIN SMALL LETTER N + 'o' # 0x6F -> LATIN SMALL LETTER O + 'p' # 0x70 -> LATIN SMALL LETTER P + 'q' # 0x71 -> LATIN SMALL LETTER Q + 'r' # 0x72 -> LATIN SMALL LETTER R + 's' # 0x73 -> LATIN SMALL LETTER S + 't' # 0x74 -> LATIN SMALL LETTER T + 'u' # 0x75 -> LATIN SMALL LETTER U + 'v' # 0x76 -> LATIN SMALL LETTER V + 'w' # 0x77 -> LATIN SMALL LETTER W + 'x' # 0x78 -> LATIN SMALL LETTER X + 'y' # 0x79 -> LATIN SMALL LETTER Y + 'z' # 0x7A -> LATIN SMALL LETTER Z + '{' # 0x7B -> LEFT CURLY BRACKET + '|' # 0x7C -> VERTICAL LINE + '}' # 0x7D -> RIGHT CURLY BRACKET + '~' # 0x7E -> TILDE + '\x7f' # 0x7F -> DELETE + '\xc7' # 0x80 -> LATIN CAPITAL LETTER C WITH CEDILLA + '\xfc' # 0x81 -> LATIN SMALL LETTER U WITH DIAERESIS + '\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE + '\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX + '\xe4' # 0x84 -> LATIN SMALL LETTER A WITH DIAERESIS + '\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE + '\xe5' # 0x86 -> LATIN SMALL LETTER A WITH RING ABOVE + '\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA + '\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX + '\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS + '\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE + '\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS + '\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX + '\xec' # 0x8D -> LATIN SMALL LETTER I WITH GRAVE + '\xc4' # 0x8E -> LATIN CAPITAL LETTER A WITH DIAERESIS + '\xc5' # 0x8F -> LATIN CAPITAL LETTER A WITH RING ABOVE + '\xc9' # 0x90 -> LATIN CAPITAL LETTER E WITH ACUTE + '\xe6' # 0x91 -> LATIN SMALL LETTER AE + '\xc6' # 0x92 -> LATIN CAPITAL LETTER AE + '\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX + '\xf6' # 0x94 -> LATIN SMALL LETTER O WITH DIAERESIS + '\xf2' # 0x95 -> LATIN SMALL LETTER O WITH GRAVE + '\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX + '\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE + '\xff' # 0x98 -> LATIN SMALL LETTER Y WITH DIAERESIS + '\xd6' # 0x99 -> LATIN CAPITAL LETTER O WITH DIAERESIS + '\xdc' # 0x9A -> LATIN CAPITAL LETTER U WITH DIAERESIS + '\xa2' # 0x9B -> CENT SIGN + '\xa3' # 0x9C -> POUND SIGN + '\xa5' # 0x9D -> YEN SIGN + '\xdf' # 0x9E -> LATIN SMALL LETTER SHARP S + '\u0192' # 0x9F -> LATIN SMALL LETTER F WITH HOOK + '\xe1' # 0xA0 -> LATIN SMALL LETTER A WITH ACUTE + '\xed' # 0xA1 -> LATIN SMALL LETTER I WITH ACUTE + '\xf3' # 0xA2 -> LATIN SMALL LETTER O WITH ACUTE + '\xfa' # 0xA3 -> LATIN SMALL LETTER U WITH ACUTE + '\xf1' # 0xA4 -> LATIN SMALL LETTER N WITH TILDE + '\xd1' # 0xA5 -> LATIN CAPITAL LETTER N WITH TILDE + '\xaa' # 0xA6 -> FEMININE ORDINAL INDICATOR + '\xba' # 0xA7 -> MASCULINE ORDINAL INDICATOR + '\xbf' # 0xA8 -> INVERTED QUESTION MARK + '\u2310' # 0xA9 -> REVERSED NOT SIGN + '\xac' # 0xAA -> NOT SIGN + '\xbd' # 0xAB -> VULGAR FRACTION ONE HALF + '\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER + '\xa1' # 0xAD -> INVERTED EXCLAMATION MARK + '\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + '\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + '\xe3' # 0xB0 -> LATIN SMALL LETTER A WITH TILDE + '\xf5' # 0xB1 -> LATIN SMALL LETTER O WITH TILDE + '\xd8' # 0xB2 -> LATIN CAPITAL LETTER O WITH STROKE + '\xf8' # 0xB3 -> LATIN SMALL LETTER O WITH STROKE + '\u0153' # 0xB4 -> LATIN SMALL LIGATURE OE + '\u0152' # 0xB5 -> LATIN CAPITAL LIGATURE OE + '\xc0' # 0xB6 -> LATIN CAPITAL LETTER A WITH GRAVE + '\xc3' # 0xB7 -> LATIN CAPITAL LETTER A WITH TILDE + '\xd5' # 0xB8 -> LATIN CAPITAL LETTER O WITH TILDE + '\xa8' # 0xB9 -> DIAERESIS + '\xb4' # 0xBA -> ACUTE ACCENT + '\u2020' # 0xBB -> DAGGER + '\xb6' # 0xBC -> PILCROW SIGN + '\xa9' # 0xBD -> COPYRIGHT SIGN + '\xae' # 0xBE -> REGISTERED SIGN + '\u2122' # 0xBF -> TRADE MARK SIGN + '\u0133' # 0xC0 -> LATIN SMALL LIGATURE IJ + '\u0132' # 0xC1 -> LATIN CAPITAL LIGATURE IJ + '\u05d0' # 0xC2 -> HEBREW LETTER ALEF + '\u05d1' # 0xC3 -> HEBREW LETTER BET + '\u05d2' # 0xC4 -> HEBREW LETTER GIMEL + '\u05d3' # 0xC5 -> HEBREW LETTER DALET + '\u05d4' # 0xC6 -> HEBREW LETTER HE + '\u05d5' # 0xC7 -> HEBREW LETTER VAV + '\u05d6' # 0xC8 -> HEBREW LETTER ZAYIN + '\u05d7' # 0xC9 -> HEBREW LETTER HET + '\u05d8' # 0xCA -> HEBREW LETTER TET + '\u05d9' # 0xCB -> HEBREW LETTER YOD + '\u05db' # 0xCC -> HEBREW LETTER KAF + '\u05dc' # 0xCD -> HEBREW LETTER LAMED + '\u05de' # 0xCE -> HEBREW LETTER MEM + '\u05e0' # 0xCF -> HEBREW LETTER NUN + '\u05e1' # 0xD0 -> HEBREW LETTER SAMEKH + '\u05e2' # 0xD1 -> HEBREW LETTER AYIN + '\u05e4' # 0xD2 -> HEBREW LETTER PE + '\u05e6' # 0xD3 -> HEBREW LETTER TSADI + '\u05e7' # 0xD4 -> HEBREW LETTER QOF + '\u05e8' # 0xD5 -> HEBREW LETTER RESH + '\u05e9' # 0xD6 -> HEBREW LETTER SHIN + '\u05ea' # 0xD7 -> HEBREW LETTER TAV + '\u05df' # 0xD8 -> HEBREW LETTER FINAL NUN + '\u05da' # 0xD9 -> HEBREW LETTER FINAL KAF + '\u05dd' # 0xDA -> HEBREW LETTER FINAL MEM + '\u05e3' # 0xDB -> HEBREW LETTER FINAL PE + '\u05e5' # 0xDC -> HEBREW LETTER FINAL TSADI + '\xa7' # 0xDD -> SECTION SIGN + '\u2227' # 0xDE -> LOGICAL AND + '\u221e' # 0xDF -> INFINITY + '\u03b1' # 0xE0 -> GREEK SMALL LETTER ALPHA + '\u03b2' # 0xE1 -> GREEK SMALL LETTER BETA + '\u0393' # 0xE2 -> GREEK CAPITAL LETTER GAMMA + '\u03c0' # 0xE3 -> GREEK SMALL LETTER PI + '\u03a3' # 0xE4 -> GREEK CAPITAL LETTER SIGMA + '\u03c3' # 0xE5 -> GREEK SMALL LETTER SIGMA + '\xb5' # 0xE6 -> MICRO SIGN + '\u03c4' # 0xE7 -> GREEK SMALL LETTER TAU + '\u03a6' # 0xE8 -> GREEK CAPITAL LETTER PHI + '\u0398' # 0xE9 -> GREEK CAPITAL LETTER THETA + '\u03a9' # 0xEA -> GREEK CAPITAL LETTER OMEGA + '\u03b4' # 0xEB -> GREEK SMALL LETTER DELTA + '\u222e' # 0xEC -> CONTOUR INTEGRAL + '\u03c6' # 0xED -> GREEK SMALL LETTER PHI + '\u2208' # 0xEE -> ELEMENT OF SIGN + '\u2229' # 0xEF -> INTERSECTION + '\u2261' # 0xF0 -> IDENTICAL TO + '\xb1' # 0xF1 -> PLUS-MINUS SIGN + '\u2265' # 0xF2 -> GREATER-THAN OR EQUAL TO + '\u2264' # 0xF3 -> LESS-THAN OR EQUAL TO + '\u2320' # 0xF4 -> TOP HALF INTEGRAL + '\u2321' # 0xF5 -> BOTTOM HALF INTEGRAL + '\xf7' # 0xF6 -> DIVISION SIGN + '\u2248' # 0xF7 -> ALMOST EQUAL TO + '\xb0' # 0xF8 -> DEGREE SIGN + '\u2219' # 0xF9 -> BULLET OPERATOR + '\xb7' # 0xFA -> MIDDLE DOT + '\u221a' # 0xFB -> SQUARE ROOT + '\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N + '\xb2' # 0xFD -> SUPERSCRIPT TWO + '\xb3' # 0xFE -> SUPERSCRIPT THREE + '\xaf' # 0xFF -> MACRON +) + +# Encoding table +ENCODING_TABLE = codecs.charmap_build(DECODING_TABLE) diff --git a/telnetlib3/encodings/petscii.py b/telnetlib3/encodings/petscii.py new file mode 100644 index 00000000..68f11edf --- /dev/null +++ b/telnetlib3/encodings/petscii.py @@ -0,0 +1,359 @@ +""" +PETSCII (Commodore 64/128) encoding -- shifted (lowercase) mode. + +PETSCII is the character encoding used by Commodore computers (C64, C128, +VIC-20, Plus/4, etc.). This codec implements the "shifted" character set +(mixed case with lowercase at 0x41-0x5A and uppercase at 0xC1-0xDA), +which is the standard mode for BBS operation. + +Mapping sources: +- Commodore 64 Programmer's Reference Guide +- https://sta.c64.org/cbm64pet.html +- Unicode Consortium Legacy Computing Supplement (U+1FB00-U+1FBFF) + +Control codes (0x00-0x1F, 0x80-0x9F) are mapped to their ASCII control +code equivalents where sensible, otherwise to U+FFFD with 'replace' errors. +Graphics characters use the closest available Unicode approximations from +the Box Drawing, Block Elements, and Geometric Shapes blocks. +""" + +# std imports +import codecs + +# Decoding Table -- PETSCII shifted (lowercase) mode, 256 entries. +# +# 0x00-0x1F : C64 control codes +# 0x20-0x3F : ASCII compatible (digits, punctuation) +# 0x40-0x5F : @, lowercase a-z, [, pound, ], up-arrow, left-arrow +# 0x60-0x7F : graphics characters +# 0x80-0x9F : C64 control codes (colors, function keys) +# 0xA0-0xBF : graphics characters (shifted) +# 0xC0-0xDF : graphics (0xC0) + uppercase A-Z (0xC1-0xDA) + graphics +# 0xE0-0xFE : graphics characters (same as 0xA0-0xBE) +# 0xFF : pi + +DECODING_TABLE = ( + # 0x00-0x1F: Control codes + '\x00' # 0x00 NUL + '\x01' # 0x01 (unused) + '\x02' # 0x02 (unused) + '\x03' # 0x03 RUN/STOP + '\x04' # 0x04 (unused) + '\x05' # 0x05 WHT (white) + '\x06' # 0x06 (unused) + '\x07' # 0x07 BEL + '\x08' # 0x08 shift-disable + '\x09' # 0x09 shift-enable + '\n' # 0x0A LF + '\x0b' # 0x0B (unused) + '\x0c' # 0x0C (unused) + '\r' # 0x0D RETURN + '\x0e' # 0x0E lowercase charset + '\x0f' # 0x0F (unused) + '\x10' # 0x10 (unused) + '\x11' # 0x11 cursor down + '\x12' # 0x12 RVS ON + '\x13' # 0x13 HOME + '\x14' # 0x14 DEL + '\x15' # 0x15 (unused) + '\x16' # 0x16 (unused) + '\x17' # 0x17 (unused) + '\x18' # 0x18 (unused) + '\x19' # 0x19 (unused) + '\x1a' # 0x1A (unused) + '\x1b' # 0x1B ESC + '\x1c' # 0x1C RED + '\x1d' # 0x1D cursor right + '\x1e' # 0x1E GRN + '\x1f' # 0x1F BLU + # 0x20-0x3F: ASCII compatible + ' ' # 0x20 SPACE + '!' # 0x21 + '"' # 0x22 + '#' # 0x23 + '$' # 0x24 + '%' # 0x25 + '&' # 0x26 + "'" # 0x27 + '(' # 0x28 + ')' # 0x29 + '*' # 0x2A + '+' # 0x2B + ',' # 0x2C + '-' # 0x2D + '.' # 0x2E + '/' # 0x2F + '0' # 0x30 + '1' # 0x31 + '2' # 0x32 + '3' # 0x33 + '4' # 0x34 + '5' # 0x35 + '6' # 0x36 + '7' # 0x37 + '8' # 0x38 + '9' # 0x39 + ':' # 0x3A + ';' # 0x3B + '<' # 0x3C + '=' # 0x3D + '>' # 0x3E + '?' # 0x3F + # 0x40-0x5F: Letters and symbols + '@' # 0x40 + 'a' # 0x41 lowercase (PETSCII shifted mode) + 'b' # 0x42 + 'c' # 0x43 + 'd' # 0x44 + 'e' # 0x45 + 'f' # 0x46 + 'g' # 0x47 + 'h' # 0x48 + 'i' # 0x49 + 'j' # 0x4A + 'k' # 0x4B + 'l' # 0x4C + 'm' # 0x4D + 'n' # 0x4E + 'o' # 0x4F + 'p' # 0x50 + 'q' # 0x51 + 'r' # 0x52 + 's' # 0x53 + 't' # 0x54 + 'u' # 0x55 + 'v' # 0x56 + 'w' # 0x57 + 'x' # 0x58 + 'y' # 0x59 + 'z' # 0x5A + '[' # 0x5B + '\u00a3' # 0x5C POUND SIGN + ']' # 0x5D + '\u2191' # 0x5E UP ARROW + '\u2190' # 0x5F LEFT ARROW + # 0x60-0x7F: Graphics characters (shifted mode) + '\u2500' # 0x60 HORIZONTAL LINE + '\u2660' # 0x61 BLACK SPADE SUIT + '\u2502' # 0x62 VERTICAL LINE + '\u2500' # 0x63 HORIZONTAL LINE + '\u2597' # 0x64 QUADRANT LOWER RIGHT + '\u2596' # 0x65 QUADRANT LOWER LEFT + '\u2598' # 0x66 QUADRANT UPPER LEFT + '\u259d' # 0x67 QUADRANT UPPER RIGHT + '\u2599' # 0x68 QUADRANT UPPER LEFT AND LOWER LEFT AND LOWER RIGHT + '\u259f' # 0x69 QUADRANT UPPER RIGHT AND LOWER LEFT AND LOWER RIGHT + '\u259e' # 0x6A QUADRANT UPPER RIGHT AND LOWER LEFT + '\u2595' # 0x6B RIGHT ONE EIGHTH BLOCK + '\u258f' # 0x6C LEFT ONE EIGHTH BLOCK + '\u2584' # 0x6D LOWER HALF BLOCK + '\u2580' # 0x6E UPPER HALF BLOCK + '\u2588' # 0x6F FULL BLOCK + '\u2584' # 0x70 LOWER HALF BLOCK (variant) + '\u259b' # 0x71 QUADRANT UPPER LEFT AND UPPER RIGHT AND LOWER LEFT + '\u2583' # 0x72 LOWER THREE EIGHTHS BLOCK + '\u2665' # 0x73 BLACK HEART SUIT + '\u259c' # 0x74 QUADRANT UPPER LEFT AND UPPER RIGHT AND LOWER RIGHT + '\u256d' # 0x75 BOX DRAWINGS LIGHT ARC DOWN AND RIGHT + '\u2573' # 0x76 BOX DRAWINGS LIGHT DIAGONAL CROSS + '\u25cb' # 0x77 WHITE CIRCLE + '\u2663' # 0x78 BLACK CLUB SUIT + '\u259a' # 0x79 QUADRANT UPPER LEFT AND LOWER RIGHT + '\u2666' # 0x7A BLACK DIAMOND SUIT + '\u253c' # 0x7B BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + '\u2502' # 0x7C VERTICAL LINE (with serif, approx) + '\u2571' # 0x7D BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT + '\u03c0' # 0x7E GREEK SMALL LETTER PI + '\u25e5' # 0x7F BLACK UPPER RIGHT TRIANGLE + # 0x80-0x9F: Control codes (colors, function keys, cursor) + '\x80' # 0x80 (unused) + '\x81' # 0x81 ORN (orange) + '\x82' # 0x82 (unused) + '\x83' # 0x83 (unused) + '\x84' # 0x84 (unused) + '\x85' # 0x85 F1 + '\x86' # 0x86 F3 + '\x87' # 0x87 F5 + '\x88' # 0x88 F7 + '\x89' # 0x89 F2 + '\x8a' # 0x8A F4 + '\x8b' # 0x8B F6 + '\x8c' # 0x8C F8 + '\r' # 0x8D SHIFT-RETURN + '\x8e' # 0x8E uppercase charset + '\x8f' # 0x8F (unused) + '\x90' # 0x90 BLK (black) + '\x91' # 0x91 cursor up + '\x92' # 0x92 RVS OFF + '\x93' # 0x93 CLR (clear screen) + '\x94' # 0x94 INS (insert) + '\x95' # 0x95 BRN (brown) + '\x96' # 0x96 LRD (light red) + '\x97' # 0x97 GR1 (dark grey) + '\x98' # 0x98 GR2 (medium grey) + '\x99' # 0x99 LGR (light green) + '\x9a' # 0x9A LBL (light blue) + '\x9b' # 0x9B GR3 (light grey) + '\x9c' # 0x9C PUR (purple) + '\x9d' # 0x9D cursor left + '\x9e' # 0x9E YEL (yellow) + '\x9f' # 0x9F CYN (cyan) + # 0xA0-0xBF: Shifted graphics + '\xa0' # 0xA0 SHIFTED SPACE (non-breaking) + '\u2584' # 0xA1 LOWER HALF BLOCK + '\u2580' # 0xA2 UPPER HALF BLOCK + '\u2500' # 0xA3 HORIZONTAL LINE + '\u2500' # 0xA4 HORIZONTAL LINE (lower) + '\u2500' # 0xA5 HORIZONTAL LINE (upper) + '\u2502' # 0xA6 VERTICAL LINE (right shifted) + '\u2502' # 0xA7 VERTICAL LINE (left shifted) + '\u2502' # 0xA8 VERTICAL LINE + '\u256e' # 0xA9 BOX DRAWINGS LIGHT ARC DOWN AND LEFT + '\u2570' # 0xAA BOX DRAWINGS LIGHT ARC UP AND RIGHT + '\u256f' # 0xAB BOX DRAWINGS LIGHT ARC UP AND LEFT + '\u2572' # 0xAC BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT + '\u2571' # 0xAD BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT + '\u2573' # 0xAE BOX DRAWINGS LIGHT DIAGONAL CROSS (small) + '\u2022' # 0xAF BULLET + '\u25e4' # 0xB0 BLACK UPPER LEFT TRIANGLE + '\u258c' # 0xB1 LEFT HALF BLOCK + '\u2597' # 0xB2 QUADRANT LOWER RIGHT + '\u2514' # 0xB3 BOX DRAWINGS LIGHT UP AND RIGHT + '\u2510' # 0xB4 BOX DRAWINGS LIGHT DOWN AND LEFT + '\u2582' # 0xB5 LOWER ONE QUARTER BLOCK + '\u250c' # 0xB6 BOX DRAWINGS LIGHT DOWN AND RIGHT + '\u2534' # 0xB7 BOX DRAWINGS LIGHT UP AND HORIZONTAL + '\u252c' # 0xB8 BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + '\u2524' # 0xB9 BOX DRAWINGS LIGHT VERTICAL AND LEFT + '\u251c' # 0xBA BOX DRAWINGS LIGHT VERTICAL AND RIGHT + '\u2586' # 0xBB LOWER THREE QUARTERS BLOCK + '\u2585' # 0xBC LOWER FIVE EIGHTHS BLOCK + '\u2590' # 0xBD RIGHT HALF BLOCK + '\u2588' # 0xBE FULL BLOCK (variant) + '\u2572' # 0xBF DIAGONAL (variant) + # 0xC0-0xDF: Horizontal line + uppercase A-Z + graphics + '\u2500' # 0xC0 HORIZONTAL LINE (same as 0x60) + 'A' # 0xC1 LATIN CAPITAL LETTER A + 'B' # 0xC2 LATIN CAPITAL LETTER B + 'C' # 0xC3 LATIN CAPITAL LETTER C + 'D' # 0xC4 LATIN CAPITAL LETTER D + 'E' # 0xC5 LATIN CAPITAL LETTER E + 'F' # 0xC6 LATIN CAPITAL LETTER F + 'G' # 0xC7 LATIN CAPITAL LETTER G + 'H' # 0xC8 LATIN CAPITAL LETTER H + 'I' # 0xC9 LATIN CAPITAL LETTER I + 'J' # 0xCA LATIN CAPITAL LETTER J + 'K' # 0xCB LATIN CAPITAL LETTER K + 'L' # 0xCC LATIN CAPITAL LETTER L + 'M' # 0xCD LATIN CAPITAL LETTER M + 'N' # 0xCE LATIN CAPITAL LETTER N + 'O' # 0xCF LATIN CAPITAL LETTER O + 'P' # 0xD0 LATIN CAPITAL LETTER P + 'Q' # 0xD1 LATIN CAPITAL LETTER Q + 'R' # 0xD2 LATIN CAPITAL LETTER R + 'S' # 0xD3 LATIN CAPITAL LETTER S + 'T' # 0xD4 LATIN CAPITAL LETTER T + 'U' # 0xD5 LATIN CAPITAL LETTER U + 'V' # 0xD6 LATIN CAPITAL LETTER V + 'W' # 0xD7 LATIN CAPITAL LETTER W + 'X' # 0xD8 LATIN CAPITAL LETTER X + 'Y' # 0xD9 LATIN CAPITAL LETTER Y + 'Z' # 0xDA LATIN CAPITAL LETTER Z + '\u253c' # 0xDB BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL + '\u2502' # 0xDC VERTICAL LINE (with tick) + '\u2571' # 0xDD DIAGONAL + '\u03c0' # 0xDE GREEK SMALL LETTER PI + '\u25e5' # 0xDF BLACK UPPER RIGHT TRIANGLE + # 0xE0-0xFE: Graphics (same as 0xA0-0xBE) + '\xa0' # 0xE0 SHIFTED SPACE + '\u2584' # 0xE1 LOWER HALF BLOCK + '\u2580' # 0xE2 UPPER HALF BLOCK + '\u2500' # 0xE3 HORIZONTAL LINE + '\u2500' # 0xE4 HORIZONTAL LINE (lower) + '\u2500' # 0xE5 HORIZONTAL LINE (upper) + '\u2502' # 0xE6 VERTICAL LINE (right shifted) + '\u2502' # 0xE7 VERTICAL LINE (left shifted) + '\u2502' # 0xE8 VERTICAL LINE + '\u256e' # 0xE9 BOX DRAWINGS LIGHT ARC DOWN AND LEFT + '\u2570' # 0xEA BOX DRAWINGS LIGHT ARC UP AND RIGHT + '\u256f' # 0xEB BOX DRAWINGS LIGHT ARC UP AND LEFT + '\u2572' # 0xEC DIAGONAL + '\u2571' # 0xED DIAGONAL + '\u2573' # 0xEE BOX DRAWINGS LIGHT DIAGONAL CROSS + '\u2022' # 0xEF BULLET + '\u25e4' # 0xF0 BLACK UPPER LEFT TRIANGLE + '\u258c' # 0xF1 LEFT HALF BLOCK + '\u2597' # 0xF2 QUADRANT LOWER RIGHT + '\u2514' # 0xF3 BOX DRAWINGS LIGHT UP AND RIGHT + '\u2510' # 0xF4 BOX DRAWINGS LIGHT DOWN AND LEFT + '\u2582' # 0xF5 LOWER ONE QUARTER BLOCK + '\u250c' # 0xF6 BOX DRAWINGS LIGHT DOWN AND RIGHT + '\u2534' # 0xF7 BOX DRAWINGS LIGHT UP AND HORIZONTAL + '\u252c' # 0xF8 BOX DRAWINGS LIGHT DOWN AND HORIZONTAL + '\u2524' # 0xF9 BOX DRAWINGS LIGHT VERTICAL AND LEFT + '\u251c' # 0xFA BOX DRAWINGS LIGHT VERTICAL AND RIGHT + '\u2586' # 0xFB LOWER THREE QUARTERS BLOCK + '\u2585' # 0xFC LOWER FIVE EIGHTHS BLOCK + '\u2590' # 0xFD RIGHT HALF BLOCK + '\u2588' # 0xFE FULL BLOCK + '\u03c0' # 0xFF PI +) + +assert len(DECODING_TABLE) == 256 + + +class Codec(codecs.Codec): + """PETSCII character map codec.""" + + def encode(self, input, errors='strict'): # pylint: disable=redefined-builtin + """Encode input string using PETSCII character map.""" + return codecs.charmap_encode(input, errors, ENCODING_TABLE) + + def decode(self, input, errors='strict'): # pylint: disable=redefined-builtin + """Decode input bytes using PETSCII character map.""" + return codecs.charmap_decode(input, errors, DECODING_TABLE) + + +class IncrementalEncoder(codecs.IncrementalEncoder): + """PETSCII incremental encoder.""" + + def encode(self, input, final=False): # pylint: disable=redefined-builtin + """Encode input string incrementally.""" + return codecs.charmap_encode(input, self.errors, ENCODING_TABLE)[0] + + +class IncrementalDecoder(codecs.IncrementalDecoder): + """PETSCII incremental decoder.""" + + def decode(self, input, final=False): # pylint: disable=redefined-builtin + """Decode input bytes incrementally.""" + return codecs.charmap_decode(input, self.errors, DECODING_TABLE)[0] + + +class StreamWriter(Codec, codecs.StreamWriter): + """PETSCII stream writer.""" + + +class StreamReader(Codec, codecs.StreamReader): + """PETSCII stream reader.""" + + +def getregentry(): + """Return the codec registry entry.""" + return codecs.CodecInfo( + name='petscii', + encode=Codec().encode, + decode=Codec().decode, + incrementalencoder=IncrementalEncoder, + incrementaldecoder=IncrementalDecoder, + streamreader=StreamReader, + streamwriter=StreamWriter, + ) + + +def getaliases(): + """Return codec aliases.""" + return ('cbm', 'commodore', 'c64', 'c128') + + +ENCODING_TABLE = codecs.charmap_build(DECODING_TABLE) diff --git a/telnetlib3/fingerprinting.py b/telnetlib3/fingerprinting.py index b2069f9d..a2268819 100644 --- a/telnetlib3/fingerprinting.py +++ b/telnetlib3/fingerprinting.py @@ -28,10 +28,14 @@ DO, DET, EOR, + MSP, + MXP, RCP, RSP, SGA, TLS, + ZMP, + ATCP, DONT, ECHO, GMCP, @@ -63,6 +67,7 @@ CHARSET, ENCRYPT, TN3270E, + AARDWOLF, LINEMODE, SEND_URL, XDISPLOC, @@ -254,6 +259,11 @@ class FingerprintingServer(FingerprintingTelnetServer, TelnetServer): (GMCP, "GMCP", "Generic MUD Communication Protocol"), (MSDP, "MSDP", "MUD Server Data Protocol"), (MSSP, "MSSP", "MUD Server Status Protocol"), + (MSP, "MSP", "MUD Sound Protocol"), + (MXP, "MXP", "MUD eXtension Protocol"), + (ZMP, "ZMP", "Zenith MUD Protocol"), + (AARDWOLF, "AARDWOLF", "Aardwolf protocol"), + (ATCP, "ATCP", "Achaea Telnet Client Protocol"), ] LEGACY_OPTIONS = [ @@ -855,11 +865,23 @@ def _cooked_input(prompt: str) -> str: termios.tcsetattr(fd, termios.TCSANOW, old_attrs) +class _BytesSafeEncoder(json.JSONEncoder): + """JSON encoder that converts bytes to str (UTF-8) or hex.""" + + def default(self, o: Any) -> Any: + if isinstance(o, bytes): + try: + return o.decode("utf-8") + except UnicodeDecodeError: + return o.hex() + return super().default(o) + + def _atomic_json_write(filepath: str, data: dict[str, Any]) -> None: """Atomically write JSON data to file via write-to-new + rename.""" tmp_path = os.path.splitext(filepath)[0] + ".json.new" with open(tmp_path, "w", encoding="utf-8") as f: - json.dump(data, f, indent=2, sort_keys=True) + json.dump(data, f, indent=2, sort_keys=True, cls=_BytesSafeEncoder) os.replace(tmp_path, filepath) @@ -902,6 +924,8 @@ def _build_session_fingerprint( rejected = _collect_rejected_options(writer) if rejected: result["rejected"] = rejected + if writer.comport_data: + result["comport"] = writer.comport_data return result @@ -980,7 +1004,8 @@ def _is_maybe_mud(writer: Union[TelnetWriter, TelnetWriterUnicode]) -> bool: for key in ("ttype1", "ttype2", "ttype3"): if (writer.get_extra_info(key) or "").lower() in MUD_TERMINALS: return True - if writer.remote_option.enabled(GMCP) or writer.remote_option.enabled(MSDP): + mud_opts = (GMCP, MSDP, MXP, MSP, ATCP, AARDWOLF) + if any(writer.remote_option.enabled(opt) for opt in mud_opts): return True return False diff --git a/telnetlib3/mud.py b/telnetlib3/mud.py index 2afde5dc..f6afd6c2 100644 --- a/telnetlib3/mud.py +++ b/telnetlib3/mud.py @@ -5,6 +5,9 @@ - GMCP (Generic MUD Communication Protocol, option 201) - MSDP (MUD Server Data Protocol, option 69) - MSSP (MUD Server Status Protocol, option 70) +- ZMP (Zenith MUD Protocol, option 93) +- ATCP (Achaea Telnet Client Protocol, option 200) +- AARDWOLF (Aardwolf protocol, option 102) All encode functions return the payload bytes only (the content between ``IAC SB