From dc5d9899c38aa90dfee70b772c6ecc91aac22114 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Tue, 16 Dec 2025 09:04:18 +0100 Subject: [PATCH 1/8] add more logging in several places to trace down bugs --- entangled/hooks/repl.py | 1 + entangled/interface/context.py | 7 +++++-- entangled/io/virtual.py | 6 ++++++ entangled/readers/code.py | 6 +++++- pyproject.toml | 2 +- 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/entangled/hooks/repl.py b/entangled/hooks/repl.py index 1eab12a..1e0e302 100644 --- a/entangled/hooks/repl.py +++ b/entangled/hooks/repl.py @@ -38,6 +38,7 @@ class Config(HookBase.Config): def __init__(self, config: Config): super().__init__(config) + log.debug(f"REPL hook config: {config}") self.config = config.config self.sessions: dict[str, Session] = {} diff --git a/entangled/interface/context.py b/entangled/interface/context.py index 1b12827..15b01e8 100644 --- a/entangled/interface/context.py +++ b/entangled/interface/context.py @@ -10,7 +10,9 @@ from ..readers import read_yaml_header, process_token, collect_plain_text, raw_markdown, InputStream, run_reader from functools import partial -import logging +from ..logging import logger + +log = logger() @dataclass class Context: @@ -18,9 +20,10 @@ class Context: _hooks: dict[str, HookBase] = field(default_factory=dict) def __post_init__(self): + log.debug(f"context: hook config: {self.config.hook}") for h in self.config.hooks: if h not in self._hooks: - logging.debug("context: loading hook %s", h) + log.debug("context: loading hook %s", h) hook = create_hook(self.config, h) if hook is None: continue diff --git a/entangled/io/virtual.py b/entangled/io/virtual.py index 6c44cd9..5a2623d 100644 --- a/entangled/io/virtual.py +++ b/entangled/io/virtual.py @@ -14,6 +14,9 @@ import tempfile from .stat import hexdigest, stat, FileData, Stat +from ..logging import logger + +log = logger() def assure_final_newline(s: str) -> str: @@ -104,6 +107,7 @@ def __getitem__(self, key: Path) -> FileData: If you expect data to have changed, you should first `reset` the cache. """ if key not in self._data: + log.debug(f"Reading `{key}`") if (s := stat(key)) is None: raise FileNotFoundError(key) self._data[key] = s @@ -141,9 +145,11 @@ def write(self, key: Path, content: str, mode: int | None = None): if key in self: new_digest = hexdigest(content) if new_digest == self[key].stat.hexdigest: + log.debug("Not writing `{key}`, content same") return del self._data[key] + log.debug(f"Writing `{key}`") key.parent.mkdir(parents=True, exist_ok=True) atomic_write(key, content, mode) diff --git a/entangled/readers/code.py b/entangled/readers/code.py index ef719f3..6869788 100644 --- a/entangled/readers/code.py +++ b/entangled/readers/code.py @@ -7,7 +7,9 @@ from .types import InputStream from ..model import ReferenceId, ReferenceName from ..errors.user import ParseError, IndentationError +from ..logging import logger +log = logger() @dataclass class Block: @@ -68,6 +70,9 @@ def read_block(namespace: tuple[str, ...], indent: str, input: InputStream) -> G if (block_data := open_block(line1)) is None: return None _ = next(input) + + log.debug(f"reading code block {block_data}") + if block_data.indent < indent: raise IndentationError(pos) @@ -97,4 +102,3 @@ def read_block(namespace: tuple[str, ...], indent: str, input: InputStream) -> G return "" raise ParseError(pos, "unexpected end of file") - diff --git a/pyproject.toml b/pyproject.toml index a5d6466..fca1ed3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "entangled-cli" -version = "2.4.1" +version = "2.4.1.dev0" description = "Literate Programming toolbox" authors = [{ name = "Johan Hidding", email = "j.hidding@esciencecenter.nl" }] requires-python = ">=3.12,<4" From 6c3a7290806e4df8398d45afd74fb97204f9c41e Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 17 Dec 2025 10:58:03 +0100 Subject: [PATCH 2/8] add hook_state system for global per-hook states --- entangled/hooks/__init__.py | 7 ++++--- entangled/hooks/base.py | 5 ++++- entangled/hooks/build.py | 11 +++++++---- entangled/hooks/quarto_attributes.py | 4 ++-- entangled/hooks/repl.py | 11 ++++++++--- entangled/hooks/task.py | 18 ++++++++++++------ entangled/interface/context.py | 22 ++++++++++++---------- 7 files changed, 49 insertions(+), 29 deletions(-) diff --git a/entangled/hooks/__init__.py b/entangled/hooks/__init__.py index c20b3f8..98b2bb2 100644 --- a/entangled/hooks/__init__.py +++ b/entangled/hooks/__init__.py @@ -25,14 +25,15 @@ } | external_hooks -def create_hook(cfg: Config, h: str) -> HookBase | None: +def create_hook(cfg: Config, h: str, state: HookBase.State) -> HookBase | None: if h not in hooks: logging.error("hook `%s` not found", h) return None try: - hook_cfg = msgspec.convert(cfg.hook.get(h, {}), type=hooks[h].Config) - hook_instance = hooks[h](hook_cfg) + hook_cls = hooks[h] + hook_cfg = msgspec.convert(cfg.hook.get(h, {}), type=hook_cls.Config) + hook_instance = hook_cls(hook_cfg, state) hook_instance.check_prerequisites() return hook_instance except PrerequisitesFailed as e: diff --git a/entangled/hooks/base.py b/entangled/hooks/base.py index d36efe6..db1d625 100644 --- a/entangled/hooks/base.py +++ b/entangled/hooks/base.py @@ -19,7 +19,10 @@ class HookBase: class Config(Struct): pass - def __init__(self, config: Config): + class State: + pass + + def __init__(self, config: Config, state: State): pass @staticmethod diff --git a/entangled/hooks/build.py b/entangled/hooks/build.py index 079eb2c..75e6ecb 100644 --- a/entangled/hooks/build.py +++ b/entangled/hooks/build.py @@ -64,9 +64,13 @@ def to_makefile(self, config: Hook.Config): exec_cmd = config.runners[self.language.name].format(script=self.scriptfile) return f"{self.target}: {self.scriptfile} {dep_str}\n" + f"\t{exec_cmd}" - def __init__(self, config: Hook.Config): - super().__init__(config) - self.recipes: list[Hook.Recipe] = [] + @dataclass + class State(HookBase.State): + recipes: list[Hook.Recipe] + + def __init__(self, config: Hook.Config, state: Hook.State): + super().__init__(config, state) + self.recipes: list[Hook.Recipe] = state.recipes self.config = config @override @@ -108,4 +112,3 @@ def on_tangle(self, t: Transaction, refs: ReferenceMap): rules="\n\n".join(r.to_makefile(self.config) for r in self.recipes), ) t.write(Path(".entangled/build/Makefile"), makefile, []) - diff --git a/entangled/hooks/quarto_attributes.py b/entangled/hooks/quarto_attributes.py index ea333c3..9e2960e 100644 --- a/entangled/hooks/quarto_attributes.py +++ b/entangled/hooks/quarto_attributes.py @@ -80,8 +80,8 @@ def amend_code_properties(code_block: CodeBlock): @final class Hook(HookBase): - def __init__(self, config: Hook.Config): - super().__init__(config) + def __init__(self, config: Hook.Config, state: Hook.State): + super().__init__(config, state) self.config = config @override diff --git a/entangled/hooks/repl.py b/entangled/hooks/repl.py index 1e0e302..a1ec3c8 100644 --- a/entangled/hooks/repl.py +++ b/entangled/hooks/repl.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass import json from typing import final, override @@ -36,11 +37,15 @@ class Hook(HookBase): class Config(HookBase.Config): config: dict[str, ReplConfig] = field(default_factory=dict) - def __init__(self, config: Config): - super().__init__(config) + @dataclass + class State(HookBase.State): + sessions: dict[str, Session] = field(default_factory=dict) + + def __init__(self, config: Config, state: State): + super().__init__(config, state) log.debug(f"REPL hook config: {config}") self.config = config.config - self.sessions: dict[str, Session] = {} + self.sessions: dict[str, Session] = state.sessions @override def on_read(self, code: CodeBlock): diff --git a/entangled/hooks/task.py b/entangled/hooks/task.py index ba1bc31..39bf21c 100644 --- a/entangled/hooks/task.py +++ b/entangled/hooks/task.py @@ -1,6 +1,6 @@ from __future__ import annotations from collections import defaultdict -from dataclasses import dataclass +from dataclasses import dataclass, field import json from pathlib import Path from typing import final, override @@ -48,12 +48,18 @@ def to_brei_task(self, refs: ReferenceMap): "path": path } - def __init__(self, config: Hook.Config): - super().__init__(config) - self.recipes: list[Hook.Recipe] = [] - self.collections: dict[str, list[str]] = defaultdict(list) + @dataclass + class State(HookBase.State): + recipes: list[Hook.Recipe] = field(default_factory=list) + collections: dict[str, list[str]] = field(default_factory=lambda: defaultdict(list)) + sources: list[Path] = field(default_factory=list) + + def __init__(self, config: Hook.Config, state: Hook.State): + super().__init__(config, state) + self.recipes: list[Hook.Recipe] = state.recipes + self.collections: dict[str, list[str]] = state.collections self.config = config - self.sources: list[Path] = [] + self.sources: list[Path] = state.sources @override def pre_tangle(self, refs: ReferenceMap): diff --git a/entangled/interface/context.py b/entangled/interface/context.py index 15b01e8..753cacf 100644 --- a/entangled/interface/context.py +++ b/entangled/interface/context.py @@ -4,7 +4,7 @@ from collections.abc import Generator, Iterable from ..config import Config, ConfigUpdate -from ..hooks import HookBase, create_hook +from ..hooks import HookBase, hooks, create_hook from ..model import Content, ReferenceMap from ..readers.yaml_header import get_config from ..readers import read_yaml_header, process_token, collect_plain_text, raw_markdown, InputStream, run_reader @@ -17,20 +17,23 @@ @dataclass class Context: config: Config = Config() + _hook_states: dict[str, HookBase.State] = field(default_factory=dict) _hooks: dict[str, HookBase] = field(default_factory=dict) - + def __post_init__(self): log.debug(f"context: hook config: {self.config.hook}") for h in self.config.hooks: - if h not in self._hooks: - log.debug("context: loading hook %s", h) - hook = create_hook(self.config, h) - if hook is None: - continue - self._hooks[h] = hook + if h not in self._hook_states: + self._hook_states[h] = hooks[h].State() + + log.debug("context: loading hook %s", h) + hook = create_hook(self.config, h, self._hook_states[h]) + if hook is None: + continue + self._hooks[h] = hook def __or__(self, update: ConfigUpdate | None) -> Context: - return Context(self.config | update, self._hooks) + return Context(self.config | update, self._hook_states) @property def hooks(self) -> list[HookBase]: @@ -56,4 +59,3 @@ def markdown(context: Context, refs: ReferenceMap, input: InputStream) -> Genera def read_markdown(context: Context, refs: ReferenceMap, input: str) -> tuple[list[Content], ConfigUpdate | None]: return run_reader(partial(markdown, context, refs), input) - From 212af6c6925aa28e810465b534104cc3567d9d39 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 17 Dec 2025 15:44:10 +0100 Subject: [PATCH 3/8] remove catch statements that caused silent exits; fix issue with indentation detection in code reader; pass file globbing and config reading through virtual fs layer to enable better testing --- entangled/commands/brei.py | 3 +- entangled/commands/main.py | 3 +- entangled/commands/reset.py | 17 ++++------- entangled/commands/status.py | 6 ++-- entangled/commands/stitch.py | 14 +++------ entangled/commands/sync.py | 27 +++++++---------- entangled/commands/tangle.py | 18 +++++------ entangled/config/__init__.py | 54 +++++++++++++++++++-------------- entangled/hooks/repl.py | 6 ++-- entangled/interface/document.py | 13 ++++---- entangled/io/__init__.py | 4 +-- entangled/io/filedb.py | 3 +- entangled/io/virtual.py | 12 ++++++++ entangled/readers/code.py | 7 +++-- entangled/readers/markdown.py | 11 +++++-- entangled/status.py | 7 +++-- uv.lock | 2 +- 17 files changed, 112 insertions(+), 95 deletions(-) diff --git a/entangled/commands/brei.py b/entangled/commands/brei.py index cda61bc..6381e1a 100644 --- a/entangled/commands/brei.py +++ b/entangled/commands/brei.py @@ -5,6 +5,7 @@ import textwrap from ..config import Config, read_config +from ..io import FileCache from brei import resolve_tasks, Phony from ..logging import logger from .main import main @@ -18,7 +19,7 @@ async def brei_main(target_strs: list[str], force_run: bool, throttle: int | Non if not Path(".entangled").exists(): Path(".entangled").mkdir() - cfg = Config() | read_config() + cfg = Config() | read_config(FileCache()) db = await resolve_tasks(cfg.brei, Path(".entangled/brei_history")) if throttle: db.throttle = asyncio.Semaphore(throttle) diff --git a/entangled/commands/main.py b/entangled/commands/main.py index 2ad2660..a01ffdd 100644 --- a/entangled/commands/main.py +++ b/entangled/commands/main.py @@ -20,5 +20,4 @@ def main(version: bool = False, debug: bool = False): sys.exit(0) configure(debug) - logger().debug(f"Welcome to Entangled v{__version__}!") - + logger().info(f"Welcome to Entangled v{__version__}!") diff --git a/entangled/commands/reset.py b/entangled/commands/reset.py index fc9351b..91793be 100644 --- a/entangled/commands/reset.py +++ b/entangled/commands/reset.py @@ -20,15 +20,10 @@ def reset(): Resets the file database. This performs a tangle without actually writing output to the files, but updating the database as if we were. """ - - try: - doc = Document() - mode = TransactionMode.RESETDB + doc = Document() + mode = TransactionMode.RESETDB - with transaction(mode) as t: - doc.load(t) - doc.tangle(t) - t.clear_orphans() - - except UserError as e: - logging.error(str(e)) + with transaction(mode) as t: + doc.load(t) + doc.tangle(t) + t.clear_orphans() diff --git a/entangled/commands/status.py b/entangled/commands/status.py index fcbe2c6..eaee28e 100644 --- a/entangled/commands/status.py +++ b/entangled/commands/status.py @@ -2,6 +2,7 @@ from collections.abc import Iterable from ..status import list_dependent_files from ..config import Config, read_config, get_input_files +from ..io import FileCache from pathlib import Path from rich.console import Console, Group @@ -30,7 +31,8 @@ def files_panel(file_list: Iterable[Path], title: str) -> Panel: def rich_status(): - cfg = Config() | read_config() + fs = FileCache() + cfg = Config() | read_config(fs) config_table = Table() config_table.add_column("name") config_table.add_column("value") @@ -47,7 +49,7 @@ def rich_status(): Panel(config_table, title="config", border_style="dark_cyan"), Columns( [ - files_panel(get_input_files(cfg), "input files"), + files_panel(get_input_files(fs, cfg), "input files"), files_panel(list_dependent_files(), "dependent files"), ] ), diff --git a/entangled/commands/stitch.py b/entangled/commands/stitch.py index c304c38..bee103a 100644 --- a/entangled/commands/stitch.py +++ b/entangled/commands/stitch.py @@ -19,13 +19,9 @@ def stitch(*, force: bool = False, show: bool = False): else: mode = TransactionMode.FAIL - try: - doc = Document() + doc = Document() - with transaction(mode) as t: - doc.load(t) - doc.load_all_code(t) - doc.stitch(t) - - except UserError as e: - e.handle() + with transaction(mode) as t: + doc.load(t) + doc.load_all_code(t) + doc.stitch(t) diff --git a/entangled/commands/sync.py b/entangled/commands/sync.py index c6897fe..c43bb7d 100644 --- a/entangled/commands/sync.py +++ b/entangled/commands/sync.py @@ -59,29 +59,24 @@ def stitch(doc: Document): doc.tangle(t) for h in doc.context.all_hooks: h.post_tangle(doc.reference_map) - -def run_sync(): - try: - doc = Document() - match sync_action(doc): - case Action.TANGLE: - logging.info("Tangling.") - tangle(doc) - case Action.STITCH: - logging.info("Stitching.") - stitch(doc) +def run_sync(): + doc = Document() + match sync_action(doc): + case Action.TANGLE: + logging.info("Tangling.") + tangle(doc) - case Action.NOTHING: - pass + case Action.STITCH: + logging.info("Stitching.") + stitch(doc) - except UserError as e: - e.handle() + case Action.NOTHING: + pass @main.command() def sync(): """Be smart wether to tangle or stich""" run_sync() - diff --git a/entangled/commands/tangle.py b/entangled/commands/tangle.py index 3f17b0f..3a00af7 100644 --- a/entangled/commands/tangle.py +++ b/entangled/commands/tangle.py @@ -22,16 +22,12 @@ def tangle(*, annotate: AnnotationMethod | None = None, force: bool = False, sho else: mode = TransactionMode.FAIL - try: - doc = Document() + doc = Document() - with transaction(mode) as t: - doc.load(t) - doc.tangle(t, annotate) - t.clear_orphans() + with transaction(mode) as t: + doc.load(t) + doc.tangle(t, annotate) + t.clear_orphans() - for h in doc.context.all_hooks: - h.post_tangle(doc.reference_map) - - except UserError as e: - e.handle() + for h in doc.context.all_hooks: + h.post_tangle(doc.reference_map) diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index fb8ec46..b95447d 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -20,13 +20,13 @@ from ..logging import logger from ..version import __version__ from ..errors.user import HelpfulUserError - +from ..io import AbstractFileCache log = logger() def read_config_from_toml( - path: Path, section: str | None = None + fs: AbstractFileCache, path: Path, section: str | None = None ) -> ConfigUpdate | None: """Read a config from given `path` in given `section`. The path should refer to a TOML file that should decode to a `Config` object. If `section` is given, only @@ -39,17 +39,17 @@ def read_config_from_toml( read_config_from_toml(Path("./pyproject.toml"), "tool.entangled") ``` """ - if not path.exists(): + if path not in fs: return None try: - with open(path, "rb") as f: - json: Any = tomllib.load(f) # pyright: ignore[reportExplicitAny] - if section is not None: - for s in section.split("."): - json = json[s] # pyright: ignore[reportAny] - update = msgspec.convert(json, type=ConfigUpdate) - log.debug("Read config from `%s`", path) - return update + content = fs[path].content + json: Any = tomllib.loads(content) + if section is not None: + for s in section.split("."): + json = json[s] # pyright: ignore[reportAny] + update = msgspec.convert(json, type=ConfigUpdate) + log.debug("Read config from `%s`", path) + return update except (msgspec.ValidationError, tomllib.TOMLDecodeError) as e: raise HelpfulUserError(f"Could not read config: {e}") @@ -59,23 +59,33 @@ def read_config_from_toml( return None -def read_config() -> ConfigUpdate | None: - if Path("./entangled.toml").exists(): - return read_config_from_toml(Path("./entangled.toml")) - if Path("./pyproject.toml").exists(): +def read_config(fs: AbstractFileCache) -> ConfigUpdate | None: + """ + Read configuration from any of the possible hard-coded locations: + + - `./entangled.toml` + - `./pyproject.toml` section `[tool.entangled]`. + + Returns a `ConfigUpdate` or `None`. To get the full `Config` object, + run `Config() | read_config(fs)`. + """ + if Path("./entangled.toml") in fs: + return read_config_from_toml(fs, Path("./entangled.toml")) + if Path("./pyproject.toml") in fs: return ( - read_config_from_toml(Path("./pyproject.toml"), "tool.entangled") + read_config_from_toml(fs, Path("./pyproject.toml"), "tool.entangled") ) return None -def get_input_files(cfg: Config) -> list[Path]: +def get_input_files(fs: AbstractFileCache, cfg: Config) -> list[Path]: + """ + Get a sorted list of all input files for this project. + """ log.debug("watch list: %s; ignoring: %s", cfg.watch_list, cfg.ignore_list) - include_file_list = chain.from_iterable(map(Path(".").glob, cfg.watch_list)) - input_file_list = [ - path for path in include_file_list - if not any(path.match(pat) for pat in cfg.ignore_list) and path.is_file() - ] + input_file_list = filter( + lambda p: not any(p.full_match(pat) for pat in cfg.ignore_list), + chain.from_iterable(map(fs.glob, cfg.watch_list))) log.debug("input file list %s", input_file_list) return sorted(input_file_list) diff --git a/entangled/hooks/repl.py b/entangled/hooks/repl.py index a1ec3c8..b45b326 100644 --- a/entangled/hooks/repl.py +++ b/entangled/hooks/repl.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field import json from typing import final, override @@ -7,7 +7,7 @@ from entangled.config.language import Language from .base import HookBase from repl_session import ReplConfig, ReplSession, ReplCommand -from msgspec import Struct, field +from msgspec import Struct from pathlib import Path from ..logging import logger @@ -35,7 +35,7 @@ def is_comment(line: str) -> bool: @final class Hook(HookBase): class Config(HookBase.Config): - config: dict[str, ReplConfig] = field(default_factory=dict) + config: dict[str, ReplConfig] = msgspec.field(default_factory=dict) @dataclass class State(HookBase.State): diff --git a/entangled/interface/document.py b/entangled/interface/document.py index daa1fb1..95bdcc0 100644 --- a/entangled/interface/document.py +++ b/entangled/interface/document.py @@ -3,7 +3,7 @@ from ..config import Config, ConfigUpdate, get_input_files, read_config, AnnotationMethod from ..model import ReferenceMap, tangle_ref, Content, content_to_text -from ..io import Transaction +from ..io import AbstractFileCache, FileCache, Transaction from ..readers import code from ..iterators import numbered_lines, run_generator from ..logging import logger @@ -29,10 +29,10 @@ def config(self, new_config: Config) -> None: self.context.config = new_config def __post_init__(self): - self.config |= read_config() + self.config |= read_config(FileCache()) - def input_files(self): - return get_input_files(self.config) + def input_files(self, fs: AbstractFileCache): + return get_input_files(fs, self.config) def source_text(self, path: Path) -> tuple[str, set[PurePath]]: deps = set() @@ -71,12 +71,14 @@ def load_code(self, t: Transaction, path: Path): t.update(path) def load_all_code(self, t: Transaction): + log.debug(f"Targets: {self.reference_map.targets()}") for tgt in self.reference_map.targets(): + log.debug(f"Reading code: `{tgt}`") if Path(tgt) in t.fs: self.load_code(t, Path(tgt)) def load(self, t: Transaction): - files = get_input_files(self.config) + files = get_input_files(t.fs, self.config) if len(files) == 1: log.debug(f"single input file `{files[0]}`") self.context |= self.load_source(t, files[0]) @@ -102,4 +104,3 @@ def stitch(self, t: Transaction): for path in self.content: text, deps = self.source_text(path) t.write(path, text, map(Path, deps)) - diff --git a/entangled/io/__init__.py b/entangled/io/__init__.py index 497f88b..23312fc 100644 --- a/entangled/io/__init__.py +++ b/entangled/io/__init__.py @@ -5,7 +5,7 @@ from .transaction import transaction, Transaction, TransactionMode from .filedb import filedb -from .virtual import FileCache, VirtualFS +from .virtual import AbstractFileCache, FileCache, VirtualFS -__all__ = ["FileCache", "filedb", "Transaction", "TransactionMode", "transaction", "VirtualFS"] +__all__ = ["AbstractFileCache", "FileCache", "filedb", "Transaction", "TransactionMode", "transaction", "VirtualFS"] diff --git a/entangled/io/filedb.py b/entangled/io/filedb.py index 05eb1ea..68f1b34 100644 --- a/entangled/io/filedb.py +++ b/entangled/io/filedb.py @@ -60,7 +60,8 @@ def create_target(self, fs: AbstractFileCache, path: Path): def update(self, fs: AbstractFileCache, path: Path): path = normal_relative(path) - self.files[path.as_posix()] = fs[path].stat + if path in self.files: + self.files[path.as_posix()] = fs[path].stat def __contains__(self, path: Path) -> bool: return path.as_posix() in self.files diff --git a/entangled/io/virtual.py b/entangled/io/virtual.py index 5a2623d..ce6821c 100644 --- a/entangled/io/virtual.py +++ b/entangled/io/virtual.py @@ -5,6 +5,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +from collections.abc import Iterable from typing import override from dataclasses import dataclass, field from pathlib import Path @@ -56,6 +57,10 @@ def __contains__(self, key: Path) -> bool: def __delitem__(self, key: Path): ... + @abstractmethod + def glob(self, pattern: str) -> Iterable[Path]: + ... + @abstractmethod def write(self, key: Path, content: str, mode: int | None = None): ... @@ -77,6 +82,9 @@ def __contains__(self, key: Path) -> bool: def __delitem__(self, key: Path): del self._data[key] + def glob(self, pattern: str) -> Iterable[Path]: + return filter(lambda p: p.full_match(pattern), self._data.keys()) + @override def write(self, key: Path, content: str, mode: int | None = None): self._data[key] = FileData(key, content, Stat(datetime.now(), hexdigest(content))) @@ -134,6 +142,10 @@ def __delitem__(self, key: Path): if key in self._data: del self._data[key] + @override + def glob(self, pattern: str) -> Iterable[Path]: + return filter(Path.is_file, map(lambda p: p.relative_to(Path.cwd()), Path.cwd().glob(pattern))) + @override def write(self, key: Path, content: str, mode: int | None = None): """ diff --git a/entangled/readers/code.py b/entangled/readers/code.py index 6869788..62ef285 100644 --- a/entangled/readers/code.py +++ b/entangled/readers/code.py @@ -85,9 +85,12 @@ def read_block(namespace: tuple[str, ...], indent: str, input: InputStream) -> G pos, line = next(input) if (close_block_data := close_block(line)) is None: - if not line.startswith(block_data.indent): + if not line.strip(): + content += line.lstrip(" \t") + elif not line.startswith(block_data.indent): raise IndentationError(pos) - content += line.removeprefix(block_data.indent) + else: + content += line.removeprefix(block_data.indent) else: if close_block_data.indent != block_data.indent: raise IndentationError(pos) diff --git a/entangled/readers/markdown.py b/entangled/readers/markdown.py index 724fc45..ee58c8c 100644 --- a/entangled/readers/markdown.py +++ b/entangled/readers/markdown.py @@ -17,7 +17,9 @@ from .yaml_header import read_yaml_header, get_config import re -import logging +from ..logging import logger + +log = logger() def ignore_block(config: Config) -> Reader[RawContent, bool]: @@ -81,7 +83,7 @@ def code_block_reader(input: InputStream) -> RawMarkdownStream[bool]: language_class = first(get_classes(properties)) language = config.get_language(language_class) if language_class else None if language_class and not language: - logging.warning(f"`{block.origin}`: language `{language_class}` unknown.") + log.warning(f"`{block.origin}`: language `{language_class}` unknown.") source = dedent(block.origin, block.content, indent) yield CodeBlock( @@ -140,6 +142,10 @@ def process_code_block(hooks: list[HookBase], refs: ReferenceMap, code_block: Co if ref_name is None: ref_name = f"unnamed-{code_block.origin}" ref = refs.new_id(code_block.origin.filename, ReferenceName(code_block.namespace, ref_name)) + + log.debug(f"Read codeblock `{ref}`") + if target_file: + log.debug(f" - target file: `{target_file}`") refs[ref] = code_block return ref @@ -171,4 +177,3 @@ def flush(): yield token yield from flush() - diff --git a/entangled/status.py b/entangled/status.py index db9f898..f54a535 100644 --- a/entangled/status.py +++ b/entangled/status.py @@ -1,5 +1,5 @@ from collections.abc import Iterable -from .io import filedb +from .io import filedb, FileCache from .config import get_input_files, Config, read_config from pathlib import Path @@ -18,8 +18,9 @@ def safe_glob(pattern: str) -> Iterable[Path]: def find_watch_dirs(): """List all directories that contain files that need watching.""" - cfg = Config() | read_config() - input_file_list = get_input_files(cfg) + fs = FileCache() + cfg = Config() | read_config(fs) + input_file_list = get_input_files(fs, cfg) markdown_dirs = set(p.parent for p in input_file_list) with filedb(readonly=True) as db: code_dirs = set(p.parent for p in db.managed_files) diff --git a/uv.lock b/uv.lock index a324bb5..514fd07 100644 --- a/uv.lock +++ b/uv.lock @@ -302,7 +302,7 @@ wheels = [ [[package]] name = "entangled-cli" -version = "2.4.1" +version = "2.4.1.dev0" source = { editable = "." } dependencies = [ { name = "brei" }, From efd8e9c7426ff9fa73a3d077d282cbc3fee542a0 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 17 Dec 2025 16:22:48 +0100 Subject: [PATCH 4/8] adapt tests to new config reading --- entangled/hooks/build.py | 9 ++--- entangled/io/filedb.py | 8 +++-- entangled/io/stat.py | 5 ++- entangled/utility.py | 4 --- test/commands/test_daemon.py | 1 + test/config/test_input_list.py | 9 +++-- test/config/test_read_config.py | 62 +++++++++++++++++++-------------- 7 files changed, 54 insertions(+), 44 deletions(-) diff --git a/entangled/hooks/build.py b/entangled/hooks/build.py index 75e6ecb..77518f0 100644 --- a/entangled/hooks/build.py +++ b/entangled/hooks/build.py @@ -6,13 +6,14 @@ """ from __future__ import annotations -from dataclasses import dataclass -from msgspec import field +from dataclasses import dataclass, field from pathlib import Path, PurePath from subprocess import run, SubprocessError, DEVNULL import logging from typing import final, override +import msgspec + from ..config.language import Language from ..io import Transaction from ..model.properties import Property, get_attribute, get_attribute_string, get_classes @@ -45,7 +46,7 @@ @final class Hook(HookBase): class Config(HookBase.Config): - runners: dict[str, str] = field(default_factory=dict) + runners: dict[str, str] = msgspec.field(default_factory=dict) def __post_init__(self): for k, v in EXEC_CMDS.items(): @@ -66,7 +67,7 @@ def to_makefile(self, config: Hook.Config): @dataclass class State(HookBase.State): - recipes: list[Hook.Recipe] + recipes: list[Hook.Recipe] = field(default_factory=list) def __init__(self, config: Hook.Config, state: Hook.State): super().__init__(config, state) diff --git a/entangled/io/filedb.py b/entangled/io/filedb.py index 68f1b34..a9cbdb0 100644 --- a/entangled/io/filedb.py +++ b/entangled/io/filedb.py @@ -15,7 +15,7 @@ from entangled.errors.user import HelpfulUserError from ..version import __version__ -from ..utility import normal_relative, ensure_parent +from ..utility import ensure_parent from .virtual import AbstractFileCache from .stat import Stat, hexdigest @@ -54,12 +54,14 @@ def changed_files(self, fs: AbstractFileCache) -> Generator[Path]: if fs[Path(p)].stat != known_stat) def create_target(self, fs: AbstractFileCache, path: Path): - path = normal_relative(path) + if path.is_absolute(): + path = path.relative_to(Path.cwd()) self.update(fs, path) self.targets.add(path.as_posix()) def update(self, fs: AbstractFileCache, path: Path): - path = normal_relative(path) + if path.is_absolute(): + path = path.relative_to(Path.cwd()) if path in self.files: self.files[path.as_posix()] = fs[path].stat diff --git a/entangled/io/stat.py b/entangled/io/stat.py index ed2df69..eff3404 100644 --- a/entangled/io/stat.py +++ b/entangled/io/stat.py @@ -10,8 +10,6 @@ import logging import time -from ..utility import normal_relative - def hexdigest(s: str) -> str: """Creates a MD5 hash digest from a string. Before hashing, the string has @@ -66,5 +64,6 @@ def from_path(path: Path) -> FileData | None: def stat(path: Path) -> FileData | None: - path = normal_relative(path) + if path.is_absolute(): + path = path.relative_to(Path.cwd()) return FileData.from_path(path) diff --git a/entangled/utility.py b/entangled/utility.py index faa74a2..be2d946 100644 --- a/entangled/utility.py +++ b/entangled/utility.py @@ -13,10 +13,6 @@ def first(it: Iterable[T]) -> T | None: return None -def normal_relative(path: Path) -> Path: - return path.resolve().relative_to(Path.cwd()) - - def ensure_parent(path: Path) -> Path: path.parent.mkdir(parents=True, exist_ok=True) return path diff --git a/test/commands/test_daemon.py b/test/commands/test_daemon.py index b0483ae..5f4b1bf 100644 --- a/test/commands/test_daemon.py +++ b/test/commands/test_daemon.py @@ -35,6 +35,7 @@ def wait_for_stat_diff(md_stat, filename, timeout=5): return False +@pytest.mark.skip @pytest.mark.timeout(30) def test_daemon(tmp_path: Path): with chdir(tmp_path): diff --git a/test/config/test_input_list.py b/test/config/test_input_list.py index 19b3bfd..7056abb 100644 --- a/test/config/test_input_list.py +++ b/test/config/test_input_list.py @@ -2,6 +2,8 @@ from pathlib import Path from contextlib import chdir +from entangled.io import FileCache + def test_input_files(tmpdir: Path): tmpdir = Path(tmpdir) @@ -11,7 +13,8 @@ def test_input_files(tmpdir: Path): (tmpdir / "a" / "y").touch() (tmpdir / "b" / "x").touch() with chdir(tmpdir): - assert get_input_files(Config(watch_list=["**/x"])) == [Path("a/x"), Path("b/x")] - assert get_input_files(Config(watch_list=["a/*"])) == [Path("a/x"), Path("a/y")] - assert get_input_files(Config(watch_list=["**/*"], ignore_list=["**/y"])) == \ + fs = FileCache() + assert get_input_files(fs, Config(watch_list=["**/x"])) == [Path("a/x"), Path("b/x")] + assert get_input_files(fs, Config(watch_list=["a/*"])) == [Path("a/x"), Path("a/y")] + assert get_input_files(fs, Config(watch_list=["**/*"], ignore_list=["**/y"])) == \ [Path("a/x"), Path("b/x")] diff --git a/test/config/test_read_config.py b/test/config/test_read_config.py index bbd8204..9e7b6b0 100644 --- a/test/config/test_read_config.py +++ b/test/config/test_read_config.py @@ -8,6 +8,8 @@ import pytest import logging +from entangled.io import FileCache + pyproject_toml = """ [tool.entangled] @@ -16,27 +18,29 @@ """.lstrip() -def test_pyproject_toml(tmpdir: Path, caplog): - with chdir(tmpdir): - assert read_config() is None +def test_pyproject_toml(tmp_path: Path, caplog): + with chdir(tmp_path): + fs = FileCache() + assert read_config(fs) is None - filename = tmpdir / "pyproject.toml" - filename.write_text(pyproject_toml, encoding="utf-8") + filename = Path("pyproject.toml") + filename.write_text(pyproject_toml, encoding="utf-8") - config = Config() | read_config_from_toml(filename, "tool.entangled") - assert config.version == Version((100,)) + config = Config() | read_config_from_toml(fs, filename, "tool.entangled") + assert config.version == Version((100,)) - with caplog.at_level(logging.DEBUG): - _ = read_config_from_toml(filename, "tool.not-entangled") - assert "tool.not-entangled" in caplog.text + with caplog.at_level(logging.DEBUG): + _ = read_config_from_toml(fs, filename, "tool.not-entangled") + assert "tool.not-entangled" in caplog.text - with pytest.raises(UserError): - _ = read_config_from_toml(filename, None) + with pytest.raises(UserError): + _ = read_config_from_toml(fs, filename, None) - assert read_config_from_toml(tmpdir / "entangled.toml") is None + assert read_config_from_toml(fs, tmp_path / "entangled.toml") is None - with chdir(tmpdir): - cfg = Config() | read_config() + with chdir(tmp_path): + fs = FileCache() + cfg = Config() | read_config(fs) assert cfg.version == Version((100,)) @@ -51,16 +55,20 @@ def test_pyproject_toml(tmpdir: Path, caplog): """.lstrip() -def test_entangled_toml(tmpdir: Path, caplog): - with chdir(tmpdir): - assert read_config() is None +def test_entangled_toml(tmp_path: Path, caplog): + with chdir(tmp_path): + fs = FileCache() + assert read_config(fs) is None - (tmpdir / "entangled.toml").write_text(entangled_toml, encoding="utf-8") + (tmp_path / "entangled.toml").write_text(entangled_toml, encoding="utf-8") - with chdir(tmpdir): - cfg = Config() | read_config() + with chdir(tmp_path): + fs = FileCache() + cfg = Config() | read_config(fs) assert cfg.version == Version((42,)) - assert cfg.get_language("kernel").name == "Kernel" + lang = cfg.get_language("kernel") + assert lang + assert lang.name == "Kernel" entangled_toml_error = """ @@ -68,9 +76,9 @@ def test_entangled_toml(tmpdir: Path, caplog): """.lstrip() -def test_entangled_toml_error(tmpdir: Path, caplog): - (tmpdir / "entangled.toml").write_text(entangled_toml_error, encoding="utf-8") - with chdir(tmpdir): +def test_entangled_toml_error(tmp_path: Path, caplog): + (tmp_path / "entangled.toml").write_text(entangled_toml_error, encoding="utf-8") + with chdir(tmp_path): with pytest.raises(UserError): - cfg = Config() | read_config() - + fs = FileCache() + _ = Config() | read_config(fs) From 0b14d640cc7ad00fb662c83b70f50193f3653d97 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Wed, 17 Dec 2025 17:10:57 +0100 Subject: [PATCH 5/8] all tests back to passing --- entangled/commands/sync.py | 2 +- entangled/commands/watch.py | 8 +++++--- entangled/io/filedb.py | 2 +- test/commands/test_daemon.py | 3 +-- test/io/test_transaction.py | 1 + 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/entangled/commands/sync.py b/entangled/commands/sync.py index c43bb7d..f922886 100644 --- a/entangled/commands/sync.py +++ b/entangled/commands/sync.py @@ -16,8 +16,8 @@ class Action(Enum): def sync_action(doc: Document) -> Action: - input_file_list = doc.input_files() fs = FileCache() + input_file_list = doc.input_files(fs) with filedb(readonly=True) as db: changed = set(db.changed_files(fs)) diff --git a/entangled/commands/watch.py b/entangled/commands/watch.py index 495f55c..7b6ccdc 100644 --- a/entangled/commands/watch.py +++ b/entangled/commands/watch.py @@ -27,15 +27,17 @@ def _watch(_stop_event: Event | None = None, _start_event: Event | None = None): def stop() -> bool: return _stop_event is not None and _stop_event.is_set() + log.debug("Running daemon") run_sync() - if _start_event: + if _start_event is not None: + log.debug("Setting start event") _start_event.set() dirs = "." # find_watch_dirs() - + for changes in watchfiles.watch(dirs, stop_event=_stop_event, watch_filter=watch_filter): - log.debug(changes) + log.debug(changes) run_sync() diff --git a/entangled/io/filedb.py b/entangled/io/filedb.py index a9cbdb0..4eb7824 100644 --- a/entangled/io/filedb.py +++ b/entangled/io/filedb.py @@ -62,7 +62,7 @@ def create_target(self, fs: AbstractFileCache, path: Path): def update(self, fs: AbstractFileCache, path: Path): if path.is_absolute(): path = path.relative_to(Path.cwd()) - if path in self.files: + if path in fs: self.files[path.as_posix()] = fs[path].stat def __contains__(self, path: Path) -> bool: diff --git a/test/commands/test_daemon.py b/test/commands/test_daemon.py index 5f4b1bf..6348184 100644 --- a/test/commands/test_daemon.py +++ b/test/commands/test_daemon.py @@ -35,8 +35,7 @@ def wait_for_stat_diff(md_stat, filename, timeout=5): return False -@pytest.mark.skip -@pytest.mark.timeout(30) +@pytest.mark.timeout(5) def test_daemon(tmp_path: Path): with chdir(tmp_path): configure(debug=True) diff --git a/test/io/test_transaction.py b/test/io/test_transaction.py index 816847d..4e6d43d 100644 --- a/test/io/test_transaction.py +++ b/test/io/test_transaction.py @@ -24,6 +24,7 @@ def test_transaction(tmp_path: Path): _ = f.write("ciao") fs.reset() + print(Path(".entangled/filedb.json").read_text()) with filedb() as db: assert Path("a") in db assert Path("b") in db From becc53202fc2286f37d4a8761d69afa5628df6a1 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 18 Dec 2025 15:01:57 +0100 Subject: [PATCH 6/8] add common path characters to legal reference name; fix stitching with namespaces --- entangled/model/tangle.py | 6 +++++- entangled/readers/code.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/entangled/model/tangle.py b/entangled/model/tangle.py index 262736f..ff4ea7b 100644 --- a/entangled/model/tangle.py +++ b/entangled/model/tangle.py @@ -12,12 +12,15 @@ from ..iterators.lines import lines from ..errors.user import UserError from ..text_location import TextLocation +from ..logging import logger from .reference_map import ReferenceMap from .reference_id import ReferenceId from .reference_name import ReferenceName +log = logger() + @dataclass class CyclicReference(UserError): ref_name: str @@ -86,8 +89,9 @@ def tangler( with visitor.visit(ref): for line in lines(code_block.source): - if m := re.match(r"^(?P\s*)<<(?P[\w:-]+)>>\s*$", line.rstrip()): + if m := re.match(r"^(?P\s*)<<(?P[\w:/_.-]+)>>\s*$", line.rstrip()): ref_name = ReferenceName.from_str(m["refname"], code_block.namespace) + log.debug(f"tangling reference `{ref_name}`") if not refs.has_name(ref_name): raise MissingReference(code_block.origin, ref_name) ref_lst = refs.select_by_name(ref_name) diff --git a/entangled/readers/code.py b/entangled/readers/code.py index 62ef285..2d44cf7 100644 --- a/entangled/readers/code.py +++ b/entangled/readers/code.py @@ -99,7 +99,7 @@ def read_block(namespace: tuple[str, ...], indent: str, input: InputStream) -> G if block_data.is_init: extra_indent = block_data.indent.removeprefix(indent) ref = block_data.ref - ref_str = ref.name if ref.name.namespace == namespace else str(ref.name) + ref_str = ref.name.name if ref.name.namespace == namespace else str(ref.name) return f"{extra_indent}<<{ref_str}>>\n" else: return "" From 121d97f5bdefe2e6a68791571dde5b0eb0f473a4 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 18 Dec 2025 15:08:35 +0100 Subject: [PATCH 7/8] up version --- pyproject.toml | 2 +- uv.lock | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fca1ed3..a5d6466 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "entangled-cli" -version = "2.4.1.dev0" +version = "2.4.1" description = "Literate Programming toolbox" authors = [{ name = "Johan Hidding", email = "j.hidding@esciencecenter.nl" }] requires-python = ">=3.12,<4" diff --git a/uv.lock b/uv.lock index 514fd07..a324bb5 100644 --- a/uv.lock +++ b/uv.lock @@ -302,7 +302,7 @@ wheels = [ [[package]] name = "entangled-cli" -version = "2.4.1.dev0" +version = "2.4.1" source = { editable = "." } dependencies = [ { name = "brei" }, From 011ff65582aae81a6196913c4203504752243104 Mon Sep 17 00:00:00 2001 From: Johan Hidding Date: Thu, 18 Dec 2025 15:28:45 +0100 Subject: [PATCH 8/8] use match method instead of full_match --- entangled/config/__init__.py | 2 +- entangled/io/virtual.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/entangled/config/__init__.py b/entangled/config/__init__.py index b95447d..f287e55 100644 --- a/entangled/config/__init__.py +++ b/entangled/config/__init__.py @@ -84,7 +84,7 @@ def get_input_files(fs: AbstractFileCache, cfg: Config) -> list[Path]: """ log.debug("watch list: %s; ignoring: %s", cfg.watch_list, cfg.ignore_list) input_file_list = filter( - lambda p: not any(p.full_match(pat) for pat in cfg.ignore_list), + lambda p: not any(p.match(pat) for pat in cfg.ignore_list), chain.from_iterable(map(fs.glob, cfg.watch_list))) log.debug("input file list %s", input_file_list) return sorted(input_file_list) diff --git a/entangled/io/virtual.py b/entangled/io/virtual.py index ce6821c..22c1a25 100644 --- a/entangled/io/virtual.py +++ b/entangled/io/virtual.py @@ -83,7 +83,7 @@ def __delitem__(self, key: Path): del self._data[key] def glob(self, pattern: str) -> Iterable[Path]: - return filter(lambda p: p.full_match(pattern), self._data.keys()) + return filter(lambda p: p.match(pattern), self._data.keys()) @override def write(self, key: Path, content: str, mode: int | None = None):