From 023cb5ddfd2895379c182be8757486c2596da9bc Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 19 Nov 2025 14:26:08 -0700 Subject: [PATCH 01/29] Update copilot instructions --- .github/copilot-instructions.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index b3777f3e..1173e410 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -102,6 +102,14 @@ Use `pixi add --feature python-dev ` to add a dependency that is only u - Unit tests target granular modules (`tests/python/unit/...`). Add new tests adjacent to similar domain (e.g., new utility → `tests/python/unit/utilities/`). - Integration tests at `tests/python/integration` cover full pipelines. - Coverage thresholds enforced (`--cov-fail-under=30` for unit suite). Keep defensive code minimal; exclude per coverage config if necessary. +- All python test files (e.g. ``test_scenario.py``) should end with the following block of code: + + .. code-block:: python + + if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) + + This allows the (single) file to be executed, running only the tests contained within, which is extremely useful when updating/modifying/adding tests in the file. - Rust tests live in crate `src` using standard Cargo conventions; prefer small, deterministic tests. ## 9. Logging & Observability From 7d4820a1655a0de72b06fabdc66e7b55f7b76c88 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 19 Nov 2025 14:54:31 -0700 Subject: [PATCH 02/29] Add missing line --- tests/python/unit/test_pb.py | 7 ++++++- tests/python/unit/test_version.py | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/python/unit/test_pb.py b/tests/python/unit/test_pb.py index af0ed08a..8986b0a7 100644 --- a/tests/python/unit/test_pb.py +++ b/tests/python/unit/test_pb.py @@ -1,7 +1,8 @@ """Tests for `compass.pb` progress bar helpers""" -from contextlib import ExitStack from io import StringIO +from pathlib import Path +from contextlib import ExitStack import pytest from rich.console import Console @@ -499,3 +500,7 @@ async def test_compass_website_crawl_prog_bar_duplicate( def test_singleton_instance_accessible(console): """Expose singleton progress bar instance""" assert isinstance(compass.pb.COMPASS_PB, compass.pb._COMPASSProgressBars) + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/test_version.py b/tests/python/unit/test_version.py index 5d0f57d0..e24f8c8b 100644 --- a/tests/python/unit/test_version.py +++ b/tests/python/unit/test_version.py @@ -6,6 +6,9 @@ """ import re +from pathlib import Path + +import pytest import compass @@ -28,3 +31,7 @@ def test_version_semantic_shape(): ) assert v != "9999", "Version set to placeholder" assert not v.startswith("10000"), "Version set to placeholder" + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From 634b08e353983c154e629747e9faa6c46ca36d47 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 19 Nov 2025 16:02:26 -0700 Subject: [PATCH 03/29] Add logging tests --- tests/python/unit/scripts/test_process.py | 142 ++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 tests/python/unit/scripts/test_process.py diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py new file mode 100644 index 00000000..9256f524 --- /dev/null +++ b/tests/python/unit/scripts/test_process.py @@ -0,0 +1,142 @@ +"""Tests for compass.scripts.process""" + +import logging +from pathlib import Path + +import pytest + +from compass.exceptions import COMPASSValueError +import compass.scripts.process as process_module +from compass.scripts.process import ( + _COMPASSRunner, + process_jurisdictions_with_openai, +) +from compass.utilities import ProcessKwargs + + +@pytest.fixture +def testing_log_file(tmp_path): + """Logger fixture for testing""" + log_fp = tmp_path / "test.log" + handler = logging.FileHandler(log_fp, encoding="utf-8") + logger = logging.getLogger("compass") + prev_level = logger.level + prev_propagate = logger.propagate + logger.setLevel(logging.ERROR) + logger.propagate = False + logger.addHandler(handler) + + yield log_fp + + handler.flush() + logger.removeHandler(handler) + handler.close() + logger.setLevel(prev_level) + logger.propagate = prev_propagate + + +def test_known_local_docs_missing_file(tmp_path): + """Raise when known_local_docs points to missing config""" + missing_fp = tmp_path / "does_not_exist.json" + runner = _COMPASSRunner( + dirs=None, + log_listener=None, + tech="solar", + models={}, + process_kwargs=ProcessKwargs(str(missing_fp), None), + ) + + with pytest.raises(COMPASSValueError, match="Config file does not exist"): + _ = runner.known_local_docs + + +def test_known_local_docs_logs_missing_file(tmp_path, testing_log_file): + """Log missing known_local_docs config to error file""" + + missing_fp = tmp_path / "does_not_exist.json" + runner = _COMPASSRunner( + dirs=None, + log_listener=None, + tech="solar", + models={}, + process_kwargs=ProcessKwargs(str(missing_fp), None), + ) + + with pytest.raises(COMPASSValueError, match="Config file does not exist"): + _ = runner.known_local_docs + + assert testing_log_file.exists() + assert "Config file does not exist" in testing_log_file.read_text( + encoding="utf-8" + ) + + +@pytest.mark.asyncio +async def test_duplicate_tasks_logs_to_file(tmp_path): + """Log duplicate LLM tasks to error file""" + + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + with pytest.raises(COMPASSValueError, match="Found duplicated task"): + _ = await process_jurisdictions_with_openai( + out_dir=tmp_path / "outputs", + tech="solar", + jurisdiction_fp=jurisdiction_fp, + model=[ + { + "name": "gpt-4.1-mini", + "tasks": ["default", "date_extraction"], + }, + { + "name": "gpt-4.1", + "tasks": [ + "ordinance_text_extraction", + "permitted_use_text_extraction", + "date_extraction", + ], + }, + ], + ) + + log_files = list((tmp_path / "outputs" / "logs").glob("*")) + assert len(log_files) == 1 + assert "Fatal error during processing" not in log_files[0].read_text( + encoding="utf-8" + ) + assert "Found duplicated task" in log_files[0].read_text(encoding="utf-8") + + +@pytest.mark.asyncio +async def test_external_exceptions_logged_to_file(tmp_path, monkeypatch): + """Log external exceptions to error file""" + + def _always_fail(*__, **___): + raise NotImplementedError("Simulated external error") + + monkeypatch.setattr( + process_module, "_initialize_model_params", _always_fail + ) + + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + with pytest.raises(NotImplementedError, match="Simulated external error"): + _ = await process_jurisdictions_with_openai( + out_dir=tmp_path / "outputs", + tech="solar", + jurisdiction_fp=jurisdiction_fp, + ) + + log_files = list((tmp_path / "outputs" / "logs").glob("*")) + assert len(log_files) == 1 + assert "Fatal error during processing" in log_files[0].read_text( + encoding="utf-8" + ) + assert "Simulated external error" in log_files[0].read_text( + encoding="utf-8" + ) + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From e3289a398270c339642ee494fa9050242c7f154a Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 19 Nov 2025 16:03:00 -0700 Subject: [PATCH 04/29] Adjust main script to catch and log errors as early as possible --- compass/scripts/process.py | 68 +++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index c6dd6eba..af17f1c4 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -21,7 +21,7 @@ download_jurisdiction_ordinances_from_website_compass_crawl, filter_ordinance_docs, ) -from compass.exceptions import COMPASSValueError +from compass.exceptions import COMPASSValueError, COMPASSError from compass.extraction import ( extract_ordinance_values, extract_ordinance_text_with_ngram_validation, @@ -457,38 +457,44 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 ofd=ordinance_file_dir, jdd=jurisdiction_dbs_dir, ) - pk = ProcessKwargs( - known_local_docs, - known_doc_urls, - file_loader_kwargs, - td_kwargs, - tpe_kwargs, - ppe_kwargs, - max_num_concurrent_jurisdictions, - ) - wsp = WebSearchParams( - num_urls_to_check_per_jurisdiction, - max_num_concurrent_browsers, - max_num_concurrent_website_searches, - url_ignore_substrings, - pytesseract_exe_fp, - search_engines, - ) - models = _initialize_model_params(model) - runner = _COMPASSRunner( - dirs=dirs, - log_listener=log_listener, - tech=tech, - models=models, - web_search_params=wsp, - process_kwargs=pk, - perform_se_search=perform_se_search, - perform_website_search=perform_website_search, - log_level=log_level, - ) async with log_listener as ll: _setup_main_logging(dirs.logs, log_level, ll, keep_async_logs) - return await runner.run(jurisdiction_fp) + try: + pk = ProcessKwargs( + known_local_docs, + known_doc_urls, + file_loader_kwargs, + td_kwargs, + tpe_kwargs, + ppe_kwargs, + max_num_concurrent_jurisdictions, + ) + wsp = WebSearchParams( + num_urls_to_check_per_jurisdiction, + max_num_concurrent_browsers, + max_num_concurrent_website_searches, + url_ignore_substrings, + pytesseract_exe_fp, + search_engines, + ) + models = _initialize_model_params(model) + runner = _COMPASSRunner( + dirs=dirs, + log_listener=log_listener, + tech=tech, + models=models, + web_search_params=wsp, + process_kwargs=pk, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + log_level=log_level, + ) + return await runner.run(jurisdiction_fp) + except COMPASSError: + raise + except Exception: + logger.exception("Fatal error during processing") + raise class _COMPASSRunner: From 423e25eb4de657631a3ea2cde0de6b4b7a0defa8 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 19 Nov 2025 17:24:27 -0700 Subject: [PATCH 05/29] Add `log_versions` function --- compass/utilities/logs.py | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/compass/utilities/logs.py b/compass/utilities/logs.py index e7dd6da7..c76ec9ed 100644 --- a/compass/utilities/logs.py +++ b/compass/utilities/logs.py @@ -14,7 +14,9 @@ from queue import SimpleQueue from functools import partial, partialmethod from logging.handlers import QueueHandler, QueueListener +from importlib.metadata import version, PackageNotFoundError +from compass import __version__ from compass.exceptions import COMPASSValueError @@ -478,6 +480,36 @@ def _get_existing_records(self): return records +def log_versions(logger): + """Log COMPASS and dependency package versions + + Parameters + ---------- + logger : logging.Logger + Logger object to log memory message to. + """ + + logger.info("Running COMPASS version %s", __version__) + packages_to_log = [ + "NREL-ELM", + "openai", + "playwright", + "tf-playwright-stealth", + "rebrowser-playwright", + "camoufox", + "pdftotext", + "pytesseract", + "langchain-text-splitters", + "crawl4ai", + "nltk", + "networkx", + "pandas", + "numpy", + ] + for pkg in packages_to_log: + logger.debug_to_file("- %s version: %s", pkg, _get_version(pkg)) + + def _setup_logging_levels(): """Setup COMPASS logging levels""" logging.TRACE = 5 @@ -491,3 +523,11 @@ def _setup_logging_levels(): logging.Logger.log, logging.DEBUG_TO_FILE ) logging.debug_to_file = partial(logging.log, logging.DEBUG_TO_FILE) + + +def _get_version(pkg_name): + """Get the version string for a package""" + try: + return version(pkg_name) + except PackageNotFoundError: + return "not installed" From e711e61b7d1a8bddf0ee5e41442b995e2f90c2b0 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 19 Nov 2025 17:25:21 -0700 Subject: [PATCH 06/29] Use new logging function --- compass/scripts/process.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index af17f1c4..2840479b 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -1,6 +1,7 @@ """Ordinance full processing logic""" import time +import json import asyncio import logging from copy import deepcopy @@ -11,7 +12,6 @@ import pandas as pd from elm.web.utilities import get_redirected_url -from compass import __version__ from compass.scripts.download import ( find_jurisdiction_website, download_known_urls, @@ -101,6 +101,7 @@ LocationFileLog, LogListener, NoLocationFilter, + log_versions, ) from compass.utilities.base import WebSearchParams from compass.utilities.parsing import load_config @@ -445,6 +446,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 and may include color-coded cost information if the terminal supports it. """ + called_args = locals() if log_level == "DEBUG": log_level = "DEBUG_TO_FILE" @@ -459,6 +461,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 ) async with log_listener as ll: _setup_main_logging(dirs.logs, log_level, ll, keep_async_logs) + _log_exec_info(called_args) try: pk = ProcessKwargs( known_local_docs, @@ -676,7 +679,6 @@ async def run(self, jurisdiction_fp): terminal and may include color-coded cost information if the terminal supports it. """ - logger.info("Running COMPASS version %s", __version__) jurisdictions = _load_jurisdictions_to_process(jurisdiction_fp) num_jurisdictions = len(jurisdictions) @@ -1384,6 +1386,15 @@ def _setup_main_logging(log_dir, level, listener, keep_async_logs): listener.addHandler(handler) +def _log_exec_info(called_args): + """Log versions and function parameters to file""" + log_versions(logger) + logger.debug_to_file( + "Called 'process_jurisdictions_with_openai' with:\n%s", + json.dumps(called_args, indent=4), + ) + + def _setup_folders(out_dir, log_dir=None, clean_dir=None, ofd=None, jdd=None): """Setup output directory folders""" dirs = Directories(out_dir, log_dir, clean_dir, ofd, jdd) From b3764e5d03128d2156c30d7509efc25806cafe30 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 10:43:10 -0700 Subject: [PATCH 07/29] Add test for `log_versions` --- .../unit/utilities/test_utilities_logs.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/python/unit/utilities/test_utilities_logs.py b/tests/python/unit/utilities/test_utilities_logs.py index 23defee7..d1889375 100644 --- a/tests/python/unit/utilities/test_utilities_logs.py +++ b/tests/python/unit/utilities/test_utilities_logs.py @@ -16,6 +16,7 @@ LocationFileLog, LocationFilter, LogListener, + log_versions, NoLocationFilter, _JsonExceptionFileHandler, _JsonFormatter, @@ -37,6 +38,18 @@ def _speed_up_location_file_log_async_exit(): LocationFileLog.ASYNC_EXIT_SLEEP_SECONDS = original_sleep +@pytest.fixture(scope="module") +def compass_logger(): + """Provide compass logger with DEBUG_TO_FILE level for tests""" + logger = logging.getLogger("compass") + prev_level = logger.level + logger.setLevel("DEBUG_TO_FILE") + try: + yield logger + finally: + logger.setLevel(prev_level) + + class _DummyListener: def __init__(self): self.added_handlers = [] @@ -623,6 +636,34 @@ def test_local_process_queue_handler_emit(): assert queued_record.msg == "test message" +def test_log_versions_logs_expected_packages( + compass_logger, assert_message_was_logged +): + """Test log_versions emits entries for each tracked package""" + + log_versions(compass_logger) + + expected_packages = [ + "NREL-ELM", + "openai", + "playwright", + "tf-playwright-stealth", + "rebrowser-playwright", + "camoufox", + "pdftotext", + "pytesseract", + "langchain-text-splitters", + "crawl4ai", + "nltk", + "networkx", + "pandas", + "numpy", + ] + assert_message_was_logged("Running COMPASS version", log_level="INFO") + for pkg in expected_packages: + assert_message_was_logged(pkg, log_level="DEBUG_TO_FILE") + + def test_log_listener_context_manager(): """Test LogListener as a context manager""" logger_name = "test_listener_logger" From 7c65d03819fb59685227d8342d556290a16f5543 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:14:18 -0700 Subject: [PATCH 08/29] Add test for logging parameters to function --- tests/python/unit/scripts/test_process.py | 50 +++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py index 9256f524..5bb042b4 100644 --- a/tests/python/unit/scripts/test_process.py +++ b/tests/python/unit/scripts/test_process.py @@ -138,5 +138,55 @@ def _always_fail(*__, **___): ) +@pytest.mark.asyncio +async def test_process_args_logged_at_debug_to_file( + tmp_path, monkeypatch, assert_message_was_logged +): + """Log function arguments with DEBUG_TO_FILE level""" + + class DummyRunner: + """Minimal runner that bypasses full processing""" + + def __init__(self, **_): + pass + + async def run(self, jurisdiction_fp): + return f"processed {jurisdiction_fp}" + + monkeypatch.setattr(process_module, "_COMPASSRunner", DummyRunner) + + out_dir = tmp_path / "outputs" + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + result = await process_jurisdictions_with_openai( + out_dir=str(out_dir), + tech="solar", + jurisdiction_fp=str(jurisdiction_fp), + log_level="DEBUG", + ) + + assert result == f"processed {jurisdiction_fp}" + + assert_message_was_logged( + "Called 'process_jurisdictions_with_openai' with:", + log_level="DEBUG_TO_FILE", + ) + assert_message_was_logged('"out_dir": ', log_level="DEBUG_TO_FILE") + assert_message_was_logged(str(out_dir), log_level="DEBUG_TO_FILE") + assert_message_was_logged('"tech": "solar"', log_level="DEBUG_TO_FILE") + assert_message_was_logged('"jurisdiction_fp": ', log_level="DEBUG_TO_FILE") + assert_message_was_logged(str(jurisdiction_fp), log_level="DEBUG_TO_FILE") + assert_message_was_logged( + '"log_level": "DEBUG"', log_level="DEBUG_TO_FILE" + ) + assert_message_was_logged( + '"model": "gpt-4o-mini"', log_level="DEBUG_TO_FILE" + ) + assert_message_was_logged( + '"keep_async_logs": false', log_level="DEBUG_TO_FILE" + ) + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From d76191e46920f17509ee7b2601db181c55407c5f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:14:28 -0700 Subject: [PATCH 09/29] Add conversion for `Path` params --- compass/scripts/process.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index 2840479b..ee933f00 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -4,6 +4,7 @@ import json import asyncio import logging +from pathlib import Path from copy import deepcopy from functools import cached_property from contextlib import AsyncExitStack, contextmanager @@ -1389,6 +1390,10 @@ def _setup_main_logging(log_dir, level, listener, keep_async_logs): def _log_exec_info(called_args): """Log versions and function parameters to file""" log_versions(logger) + for name, param in called_args.items(): + if isinstance(param, Path): + called_args[name] = str(param) + logger.debug_to_file( "Called 'process_jurisdictions_with_openai' with:\n%s", json.dumps(called_args, indent=4), From e9cbcfb5681175291727b01d5536bd7aa3e8a1b4 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:27:53 -0700 Subject: [PATCH 10/29] Add log about processing steps + test --- compass/scripts/process.py | 44 +++++++++++++++-- tests/python/unit/scripts/test_process.py | 60 ++++++++++++++++++----- 2 files changed, 89 insertions(+), 15 deletions(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index ee933f00..dacdeb7a 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -462,7 +462,13 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 ) async with log_listener as ll: _setup_main_logging(dirs.logs, log_level, ll, keep_async_logs) - _log_exec_info(called_args) + steps = _check_enabled_steps( + known_local_docs=known_local_docs, + known_doc_urls=known_doc_urls, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + ) + _log_exec_info(called_args, steps) try: pk = ProcessKwargs( known_local_docs, @@ -1387,19 +1393,51 @@ def _setup_main_logging(log_dir, level, listener, keep_async_logs): listener.addHandler(handler) -def _log_exec_info(called_args): +def _log_exec_info(called_args, steps): """Log versions and function parameters to file""" log_versions(logger) + + logger.info( + "Using the following processing steps:\n\t\t%s", " -> ".join(steps) + ) + for name, param in called_args.items(): if isinstance(param, Path): called_args[name] = str(param) - logger.debug_to_file( "Called 'process_jurisdictions_with_openai' with:\n%s", json.dumps(called_args, indent=4), ) +def _check_enabled_steps( + known_local_docs=None, + known_doc_urls=None, + perform_se_search=True, + perform_website_search=True, +): + """Check that at least one processing step is enabled""" + steps = [] + if known_local_docs: + steps.append("Check local document") + if known_doc_urls: + steps.append("Check known document URL") + if perform_se_search: + steps.append("Look for document using search engine") + if perform_website_search: + steps.append("Look for document on jurisdiction website") + + if not steps: + msg = ( + "No processing steps enabled! Please provide at least one of " + "'known_local_docs', 'known_doc_urls', or set at least one of " + "'perform_se_search' or 'perform_website_search' to True." + ) + raise COMPASSValueError(msg) + + return steps + + def _setup_folders(out_dir, log_dir=None, clean_dir=None, ofd=None, jdd=None): """Setup output directory folders""" dirs = Directories(out_dir, log_dir, clean_dir, ofd, jdd) diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py index 5bb042b4..0ebf8e33 100644 --- a/tests/python/unit/scripts/test_process.py +++ b/tests/python/unit/scripts/test_process.py @@ -35,6 +35,22 @@ def testing_log_file(tmp_path): logger.propagate = prev_propagate +@pytest.fixture +def patched_runner(monkeypatch): + """Patch the COMPASSRunner to a dummy that bypasses processing""" + + class DummyRunner: + """Minimal runner that bypasses full processing""" + + def __init__(self, **_): + pass + + async def run(self, jurisdiction_fp): + return f"processed {jurisdiction_fp}" + + monkeypatch.setattr(process_module, "_COMPASSRunner", DummyRunner) + + def test_known_local_docs_missing_file(tmp_path): """Raise when known_local_docs points to missing config""" missing_fp = tmp_path / "does_not_exist.json" @@ -140,21 +156,10 @@ def _always_fail(*__, **___): @pytest.mark.asyncio async def test_process_args_logged_at_debug_to_file( - tmp_path, monkeypatch, assert_message_was_logged + tmp_path, patched_runner, assert_message_was_logged ): """Log function arguments with DEBUG_TO_FILE level""" - class DummyRunner: - """Minimal runner that bypasses full processing""" - - def __init__(self, **_): - pass - - async def run(self, jurisdiction_fp): - return f"processed {jurisdiction_fp}" - - monkeypatch.setattr(process_module, "_COMPASSRunner", DummyRunner) - out_dir = tmp_path / "outputs" jurisdiction_fp = tmp_path / "jurisdictions.csv" jurisdiction_fp.touch() @@ -188,5 +193,36 @@ async def run(self, jurisdiction_fp): ) +@pytest.mark.asyncio +async def test_process_steps_logged( + tmp_path, patched_runner, assert_message_was_logged +): + """Log function arguments with DEBUG_TO_FILE level""" + + out_dir = tmp_path / "outputs" + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + result = await process_jurisdictions_with_openai( + out_dir=str(out_dir), + tech="solar", + jurisdiction_fp=str(jurisdiction_fp), + log_level="DEBUG", + ) + + assert result == f"processed {jurisdiction_fp}" + + assert_message_was_logged( + "Using the following processing steps:", log_level="INFO" + ) + assert_message_was_logged( + ( + "Look for document using search engine " + "-> Look for document on jurisdiction website" + ), + log_level="INFO", + ) + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From 4ee165445fe2f3006535fcaf08a5ac9e7d25bc5a Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:29:38 -0700 Subject: [PATCH 11/29] Add test for error being thrown --- tests/python/unit/scripts/test_process.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py index 0ebf8e33..808b53ec 100644 --- a/tests/python/unit/scripts/test_process.py +++ b/tests/python/unit/scripts/test_process.py @@ -224,5 +224,26 @@ async def test_process_steps_logged( ) +@pytest.mark.asyncio +async def test_process_error_for_no_steps(tmp_path, patched_runner): + """Log function arguments with DEBUG_TO_FILE level""" + + out_dir = tmp_path / "outputs" + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + with pytest.raises(COMPASSValueError, match="No processing steps enabled"): + await process_jurisdictions_with_openai( + out_dir=str(out_dir), + tech="solar", + jurisdiction_fp=str(jurisdiction_fp), + log_level="DEBUG", + known_local_docs=None, + known_doc_urls=None, + perform_se_search=False, + perform_website_search=False, + ) + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From 73bd2b0549a8a638d66d590f6d0fa3c42609422d Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:56:26 -0700 Subject: [PATCH 12/29] Add new func --- compass/utilities/parsing.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 1bb4284a..bea97f0d 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -236,3 +236,21 @@ def load_config(config_fp): f"{config_fp.suffix}. Supported extensions are .json5 and .json." ) raise COMPASSValueError(msg) + + +def convert_paths_to_strings(obj): + """Convert Path instances within nested structures to strings""" + if isinstance(obj, Path): + return str(obj) + if isinstance(obj, dict): + return { + convert_paths_to_strings(key): convert_paths_to_strings(value) + for key, value in obj.items() + } + if isinstance(obj, list): + return [convert_paths_to_strings(item) for item in obj] + if isinstance(obj, tuple): + return tuple(convert_paths_to_strings(item) for item in obj) + if isinstance(obj, set): + return {convert_paths_to_strings(item) for item in obj} + return obj From c713e1aa159fdaa77da818113a9a8437a7a7e75c Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:56:54 -0700 Subject: [PATCH 13/29] Use new utility func --- compass/scripts/process.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index dacdeb7a..c8fedbfa 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -4,7 +4,6 @@ import json import asyncio import logging -from pathlib import Path from copy import deepcopy from functools import cached_property from contextlib import AsyncExitStack, contextmanager @@ -105,7 +104,7 @@ log_versions, ) from compass.utilities.base import WebSearchParams -from compass.utilities.parsing import load_config +from compass.utilities.parsing import load_config, convert_paths_to_strings from compass.pb import COMPASS_PB @@ -1401,12 +1400,10 @@ def _log_exec_info(called_args, steps): "Using the following processing steps:\n\t\t%s", " -> ".join(steps) ) - for name, param in called_args.items(): - if isinstance(param, Path): - called_args[name] = str(param) + normalized_args = convert_paths_to_strings(called_args) logger.debug_to_file( "Called 'process_jurisdictions_with_openai' with:\n%s", - json.dumps(called_args, indent=4), + json.dumps(normalized_args, indent=4), ) From 6417565b884e98f641d8d84d5b1c1fea2c350e25 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 11:58:02 -0700 Subject: [PATCH 14/29] Log start and end of processing --- compass/scripts/process.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index c8fedbfa..88229ca2 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -885,6 +885,10 @@ async def run(self): """Download and parse document for a single jurisdiction""" start_time = time.monotonic() doc = None + logger.info( + "Kicking off processing for jurisdiction: %s", + self.jurisdiction.full_name, + ) try: doc = await self._run() finally: @@ -892,6 +896,10 @@ async def run(self): await _record_jurisdiction_info( self.jurisdiction, doc, start_time, self.usage_tracker ) + logger.info( + "Completed processing for jurisdiction: %s", + self.jurisdiction.full_name, + ) return doc From 27fbf38f422b911b5fddfe1b05dc9fb5960bb6df Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:05:31 -0700 Subject: [PATCH 15/29] Configure docs to ignore funcs with docstrings that start with `[NOT PUBLIC API]` --- compass/__init__.py | 4 +- compass/utilities/logs.py | 4 +- compass/utilities/parsing.py | 2 +- docs/source/conf.py | 56 +++++++++++-------- .../unit/utilities/test_utilities_logs.py | 6 +- 5 files changed, 42 insertions(+), 30 deletions(-) diff --git a/compass/__init__.py b/compass/__init__.py index 4f75f231..28cb2282 100644 --- a/compass/__init__.py +++ b/compass/__init__.py @@ -1,6 +1,6 @@ """Ordinance document download and structured data extraction""" from ._version import __version__ -from .utilities.logs import _setup_logging_levels, COMPASS_DEBUG_LEVEL +from .utilities.logs import setup_logging_levels, COMPASS_DEBUG_LEVEL -_setup_logging_levels() +setup_logging_levels() diff --git a/compass/utilities/logs.py b/compass/utilities/logs.py index c76ec9ed..21ac84c7 100644 --- a/compass/utilities/logs.py +++ b/compass/utilities/logs.py @@ -510,8 +510,8 @@ def log_versions(logger): logger.debug_to_file("- %s version: %s", pkg, _get_version(pkg)) -def _setup_logging_levels(): - """Setup COMPASS logging levels""" +def setup_logging_levels(): + """[NOT PUBLIC API] Setup COMPASS logging levels""" logging.TRACE = 5 logging.addLevelName(logging.TRACE, "TRACE") logging.Logger.trace = partialmethod(logging.Logger.log, logging.TRACE) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index bea97f0d..4b5da44c 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -239,7 +239,7 @@ def load_config(config_fp): def convert_paths_to_strings(obj): - """Convert Path instances within nested structures to strings""" + """[NOT PUBLIC API] Convert all Path instances to strings""" if isinstance(obj, Path): return str(obj) if isinstance(obj, dict): diff --git a/docs/source/conf.py b/docs/source/conf.py index 2f962fe2..fdcb32bb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -209,26 +209,8 @@ ] -def skip_external_methods(app, what, name, obj, skip, options): - if name in { - "clear", - "pop", - "popitem", - "setdefault", - "update", - } and "MutableMapping" in str(obj): - return True - - if name in {"copy", "fromkeys"} and "UsageTracker" in str(obj): - return True - - if name in {"items", "keys", "values"} and "Mapping" in str(obj): - return True - - if name in {"copy", "get"} and "UserDict" in str(obj): - return True - - if name in { +def _skip_pydantic_methods(name, obj): + return name in { "model_dump_json", "model_json_schema", "model_dump", @@ -254,14 +236,44 @@ def skip_external_methods(app, what, name, obj, skip, options): "schema_json", "update_forward_refs", "validate", - } and "BaseModel" in str(obj): + } and "BaseModel" in str(obj) + + +def _skip_builtin_methods(name, obj): + if name in { + "clear", + "pop", + "popitem", + "setdefault", + "update", + } and "MutableMapping" in str(obj): + return True + + if name in {"items", "keys", "values"} and "Mapping" in str(obj): return True + return name in {"copy", "get"} and "UserDict" in str(obj) + + +def _skip_internal_api(name, obj): + if (getattr(obj, "__doc__", None) or "").startswith("[NOT PUBLIC API]"): + return True + + return name in {"copy", "fromkeys"} and "UsageTracker" in str(obj) + + +def _skip_member(app, what, name, obj, skip, options): + if ( + _skip_internal_api(name, obj) + or _skip_builtin_methods(name, obj) + or _skip_pydantic_methods(name, obj) + ): + return True return None def setup(app): - app.connect("autodoc-skip-member", skip_external_methods) + app.connect("autodoc-skip-member", _skip_member) # -- Extension configuration ------------------------------------------------- diff --git a/tests/python/unit/utilities/test_utilities_logs.py b/tests/python/unit/utilities/test_utilities_logs.py index d1889375..b5112c1b 100644 --- a/tests/python/unit/utilities/test_utilities_logs.py +++ b/tests/python/unit/utilities/test_utilities_logs.py @@ -18,10 +18,10 @@ LogListener, log_versions, NoLocationFilter, + setup_logging_levels, _JsonExceptionFileHandler, _JsonFormatter, _LocalProcessQueueHandler, - _setup_logging_levels, LOGGING_QUEUE, ) @@ -597,8 +597,8 @@ def test_json_exception_file_handler_multiple_exceptions(tmp_path): def test_setup_logging_levels(): - """Test _setup_logging_levels adds custom logging levels""" - _setup_logging_levels() + """Test setup_logging_levels adds custom logging levels""" + setup_logging_levels() assert hasattr(logging, "TRACE") assert logging.TRACE == 5 From a8aca8c123c49b16a30a1c7f26219d8b2c6ea341 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:05:46 -0700 Subject: [PATCH 16/29] Adjust test to cover all permutations --- tests/python/unit/scripts/test_process.py | 94 +++++++++++++++-------- 1 file changed, 64 insertions(+), 30 deletions(-) diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py index 808b53ec..12ad27f2 100644 --- a/tests/python/unit/scripts/test_process.py +++ b/tests/python/unit/scripts/test_process.py @@ -2,6 +2,7 @@ import logging from pathlib import Path +from itertools import product import pytest @@ -194,20 +195,80 @@ async def test_process_args_logged_at_debug_to_file( @pytest.mark.asyncio +@pytest.mark.parametrize( + ( + "has_known_local_docs", + "has_known_doc_urls", + "perform_se_search", + "perform_website_search", + ), + [ + pytest.param( + *flags, id=("local-{}_urls-{}_se-{}_web-{}".format(*flags)) + ) + for flags in product([False, True], repeat=4) + ], +) async def test_process_steps_logged( - tmp_path, patched_runner, assert_message_was_logged + tmp_path, + patched_runner, + assert_message_was_logged, + has_known_local_docs, + has_known_doc_urls, + perform_se_search, + perform_website_search, ): - """Log function arguments with DEBUG_TO_FILE level""" + """Log enabled processing steps for every combination of inputs""" out_dir = tmp_path / "outputs" jurisdiction_fp = tmp_path / "jurisdictions.csv" jurisdiction_fp.touch() + known_local_docs = None + if has_known_local_docs: + known_local_docs = {"1": [{"source_fp": tmp_path / "local_doc.pdf"}]} + + known_doc_urls = None + if has_known_doc_urls: + known_doc_urls = { + "1": [{"source": "https://example.com/ordinance.pdf"}] + } + + expected_steps = [] + if has_known_local_docs: + expected_steps.append("Check local document") + if has_known_doc_urls: + expected_steps.append("Check known document URL") + if perform_se_search: + expected_steps.append("Look for document using search engine") + if perform_website_search: + expected_steps.append("Look for document on jurisdiction website") + + if not expected_steps: + with pytest.raises( + COMPASSValueError, match="No processing steps enabled" + ): + await process_jurisdictions_with_openai( + out_dir=str(out_dir), + tech="solar", + jurisdiction_fp=str(jurisdiction_fp), + log_level="DEBUG", + known_local_docs=known_local_docs, + known_doc_urls=known_doc_urls, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + ) + return + result = await process_jurisdictions_with_openai( out_dir=str(out_dir), tech="solar", jurisdiction_fp=str(jurisdiction_fp), log_level="DEBUG", + known_local_docs=known_local_docs, + known_doc_urls=known_doc_urls, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, ) assert result == f"processed {jurisdiction_fp}" @@ -215,34 +276,7 @@ async def test_process_steps_logged( assert_message_was_logged( "Using the following processing steps:", log_level="INFO" ) - assert_message_was_logged( - ( - "Look for document using search engine " - "-> Look for document on jurisdiction website" - ), - log_level="INFO", - ) - - -@pytest.mark.asyncio -async def test_process_error_for_no_steps(tmp_path, patched_runner): - """Log function arguments with DEBUG_TO_FILE level""" - - out_dir = tmp_path / "outputs" - jurisdiction_fp = tmp_path / "jurisdictions.csv" - jurisdiction_fp.touch() - - with pytest.raises(COMPASSValueError, match="No processing steps enabled"): - await process_jurisdictions_with_openai( - out_dir=str(out_dir), - tech="solar", - jurisdiction_fp=str(jurisdiction_fp), - log_level="DEBUG", - known_local_docs=None, - known_doc_urls=None, - perform_se_search=False, - perform_website_search=False, - ) + assert_message_was_logged(" -> ".join(expected_steps), log_level="INFO") if __name__ == "__main__": From ac14a65099403bba109eff23eda3bc2ee6c9c06f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:09:07 -0700 Subject: [PATCH 17/29] Document new filter functionality --- .github/copilot-instructions.md | 1 + docs/source/dev/README.rst | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index 1173e410..fd35f7ad 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -92,6 +92,7 @@ Use `pixi add --feature python-dev ` to add a dependency that is only u - Do not document parameters in the class docstring - do that in the __init__ docstring instead. - All @property and @cached_property method documentation should be one line long and should start with the return type. - "Protected" functions and methods should always be documented using only one-line summary docstrings. +- To exclude functions or classes from the public API documentation, start the docstring with the token ``[NOT PUBLIC API]``. ## 7. Coding Guidelines (Rust) - Workspace-managed deps; update root `Cargo.toml` if adding shared dependency. diff --git a/docs/source/dev/README.rst b/docs/source/dev/README.rst index 4b93c441..fa047306 100644 --- a/docs/source/dev/README.rst +++ b/docs/source/dev/README.rst @@ -177,7 +177,10 @@ As such, please adhere to these guidelines: Such code is subject to change at any time, so you should never rely on private/protected functionality unless you know what you are doing (in which case you should be relying on the function's code, not docstring). -5) Link any functions and/or classes that you reference in your docstring. +5) If you want to create a function or method meant to be used across the repository but **not** + be included as part of the public API (i.e. not have it included in the autogenerated Sphinx + documentation), start the docstring with the token ``[NOT PUBLIC API]``. +6) Link any functions and/or classes that you reference in your docstring. Sphinx allows interlinks between different sets of documentation, which can be a really convenient way for new users to learn more about the external libraries they are expected to use. For more information on how to set up links in your documentation, please see From b2ba6d889495f664bed7b46dc4869303c8af4086 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:11:11 -0700 Subject: [PATCH 18/29] Log each processing step --- compass/scripts/process.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index 88229ca2..829675a4 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -906,6 +906,10 @@ async def run(self): async def _run(self): """Search for docs and parse them for ordinances""" if self.known_local_docs: + logger.debug( + "Checking local docs for jurisdiction: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._load_known_local_documents, ) @@ -913,6 +917,10 @@ async def _run(self): return doc if self.known_doc_urls: + logger.debug( + "Checking known URL's for jurisdiction: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._download_known_url_documents, ) @@ -920,6 +928,11 @@ async def _run(self): return doc if self.perform_se_search: + logger.debug( + "Collecting documents using a search engine for " + "jurisdiction: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._find_documents_using_search_engine, ) @@ -927,6 +940,10 @@ async def _run(self): return doc if self.perform_website_search: + logger.debug( + "Collecting documents from the jurisdiction website for: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._find_documents_from_website, ) From b73cd9f29c126ebfaaf2e1e4c880476d4665be69 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:16:56 -0700 Subject: [PATCH 19/29] Minor style update --- compass/scripts/process.py | 2 +- tests/python/unit/scripts/test_process.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index 829675a4..e452adf9 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -1422,7 +1422,7 @@ def _log_exec_info(called_args, steps): log_versions(logger) logger.info( - "Using the following processing steps:\n\t\t%s", " -> ".join(steps) + "Using the following processing step(s):\n\t%s", " -> ".join(steps) ) normalized_args = convert_paths_to_strings(called_args) diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py index 12ad27f2..7bf201b1 100644 --- a/tests/python/unit/scripts/test_process.py +++ b/tests/python/unit/scripts/test_process.py @@ -274,7 +274,7 @@ async def test_process_steps_logged( assert result == f"processed {jurisdiction_fp}" assert_message_was_logged( - "Using the following processing steps:", log_level="INFO" + "Using the following processing step(s):", log_level="INFO" ) assert_message_was_logged(" -> ".join(expected_steps), log_level="INFO") From 7c2a3d967bc7cb48c8d8ac771729a1a3d2d2d963 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:24:59 -0700 Subject: [PATCH 20/29] Minor update to regex --- tests/python/unit/test_version.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/python/unit/test_version.py b/tests/python/unit/test_version.py index e24f8c8b..2e01ebd6 100644 --- a/tests/python/unit/test_version.py +++ b/tests/python/unit/test_version.py @@ -13,7 +13,9 @@ import compass -SEMVER_DEV_PATTERN = re.compile(r"^\d+\.\d+\.\d+(?:\.dev\d+\+g[0-9a-f]+)?$") +SEMVER_DEV_PATTERN = re.compile( + r"^\d+\.\d+\.\d+(?:\.dev\d+\+g[0-9A-Fa-f]+(?:\.d\d{8})?)?$" +) def test_version_string_present(): From 241de09d55ab382864c3b4241034ef4b4e271f98 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:37:16 -0700 Subject: [PATCH 21/29] Fix test for windows --- tests/python/unit/scripts/test_process.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py index 7bf201b1..e9411c94 100644 --- a/tests/python/unit/scripts/test_process.py +++ b/tests/python/unit/scripts/test_process.py @@ -166,9 +166,9 @@ async def test_process_args_logged_at_debug_to_file( jurisdiction_fp.touch() result = await process_jurisdictions_with_openai( - out_dir=str(out_dir), + out_dir=out_dir, tech="solar", - jurisdiction_fp=str(jurisdiction_fp), + jurisdiction_fp=jurisdiction_fp, log_level="DEBUG", ) @@ -179,10 +179,10 @@ async def test_process_args_logged_at_debug_to_file( log_level="DEBUG_TO_FILE", ) assert_message_was_logged('"out_dir": ', log_level="DEBUG_TO_FILE") - assert_message_was_logged(str(out_dir), log_level="DEBUG_TO_FILE") + assert_message_was_logged("outputs", log_level="DEBUG_TO_FILE") assert_message_was_logged('"tech": "solar"', log_level="DEBUG_TO_FILE") assert_message_was_logged('"jurisdiction_fp": ', log_level="DEBUG_TO_FILE") - assert_message_was_logged(str(jurisdiction_fp), log_level="DEBUG_TO_FILE") + assert_message_was_logged("jurisdictions.csv", log_level="DEBUG_TO_FILE") assert_message_was_logged( '"log_level": "DEBUG"', log_level="DEBUG_TO_FILE" ) From adae1d2b75dda95818ee522710e3897c42a7907b Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:38:11 -0700 Subject: [PATCH 22/29] Remove unnecessary function call --- tests/python/unit/utilities/test_utilities_logs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/python/unit/utilities/test_utilities_logs.py b/tests/python/unit/utilities/test_utilities_logs.py index b5112c1b..c76daccf 100644 --- a/tests/python/unit/utilities/test_utilities_logs.py +++ b/tests/python/unit/utilities/test_utilities_logs.py @@ -18,7 +18,6 @@ LogListener, log_versions, NoLocationFilter, - setup_logging_levels, _JsonExceptionFileHandler, _JsonFormatter, _LocalProcessQueueHandler, @@ -598,7 +597,6 @@ def test_json_exception_file_handler_multiple_exceptions(tmp_path): def test_setup_logging_levels(): """Test setup_logging_levels adds custom logging levels""" - setup_logging_levels() assert hasattr(logging, "TRACE") assert logging.TRACE == 5 From 160fa7450d31246b0fbb7b02c3764f83195ae49e Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 12:39:27 -0700 Subject: [PATCH 23/29] Fix grammar --- compass/scripts/process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index e452adf9..5d04ffbf 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -918,7 +918,7 @@ async def _run(self): if self.known_doc_urls: logger.debug( - "Checking known URL's for jurisdiction: %s", + "Checking known URLs for jurisdiction: %s", self.jurisdiction.full_name, ) doc = await self._try_find_ordinances( From e2f703bc1df8149f63f414b33a4254080b6a7675 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 13:01:41 -0700 Subject: [PATCH 24/29] Add a few more tests --- .../unit/utilities/test_utilities_logs.py | 6 +++ .../unit/utilities/test_utilities_parsing.py | 44 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tests/python/unit/utilities/test_utilities_logs.py b/tests/python/unit/utilities/test_utilities_logs.py index c76daccf..fadfccc5 100644 --- a/tests/python/unit/utilities/test_utilities_logs.py +++ b/tests/python/unit/utilities/test_utilities_logs.py @@ -21,6 +21,7 @@ _JsonExceptionFileHandler, _JsonFormatter, _LocalProcessQueueHandler, + _get_version, LOGGING_QUEUE, ) @@ -713,5 +714,10 @@ def emit(self, record): assert len(logger.handlers) == 0 +def test_get_dne_package(): + """Test _get_version for a non-existent package""" + assert _get_version("DNE") == "not installed" + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/utilities/test_utilities_parsing.py b/tests/python/unit/utilities/test_utilities_parsing.py index e505515f..6a1e4b4d 100644 --- a/tests/python/unit/utilities/test_utilities_parsing.py +++ b/tests/python/unit/utilities/test_utilities_parsing.py @@ -10,6 +10,7 @@ from compass.utilities.parsing import ( clean_backticks_from_llm_response, + convert_paths_to_strings, extract_ord_year_from_doc_attrs, llm_response_as_json, load_config, @@ -272,5 +273,48 @@ def test_load_config_invalid_extension(tmp_path): load_config(config_file) +def test_convert_paths_to_strings_all_structures(): + """Test `convert_paths_to_strings` across nested containers""" + + input_obj = { + Path("path_key"): { + "list": [ + Path("inner_list_item"), + {Path("dict_key"): Path("dict_value")}, + ], + "tuple": (Path("inner_tuple_item"), "preserve"), + "set": {Path("inner_set_item"), "inner_literal"}, + }, + "list": [Path("top_list_item"), (Path("tuple_in_list"),)], + "tuple": (Path("top_tuple_item"), {Path("tuple_set_item")}), + "set": { + Path("top_set_item"), + ("nested_tuple", Path("nested_tuple_path")), + }, + "value": "literal", + "path_value": Path("top_value_path"), + } + + result = convert_paths_to_strings(input_obj) + + expected = { + "path_key": { + "list": [ + "inner_list_item", + {"dict_key": "dict_value"}, + ], + "tuple": ("inner_tuple_item", "preserve"), + "set": {"inner_set_item", "inner_literal"}, + }, + "list": ["top_list_item", ("tuple_in_list",)], + "tuple": ("top_tuple_item", {"tuple_set_item"}), + "set": {"top_set_item", ("nested_tuple", "nested_tuple_path")}, + "value": "literal", + "path_value": "top_value_path", + } + + assert result == expected + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From 511d408a77fc4ab25a894043ad7313e4f63a9f97 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 21:25:46 -0700 Subject: [PATCH 25/29] Log the format used in file --- compass/scripts/process.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/compass/scripts/process.py b/compass/scripts/process.py index 5d04ffbf..efc9c69f 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -1409,12 +1409,12 @@ def _setup_main_logging(log_dir, level, listener, keep_async_logs): if keep_async_logs: handler = logging.FileHandler(log_dir / "all.log", encoding="utf-8") - fmt = logging.Formatter( - fmt="[%(asctime)s] %(levelname)s - %(taskName)s: %(message)s", - ) + log_fmt = "[%(asctime)s] %(levelname)s - %(taskName)s: %(message)s" + fmt = logging.Formatter(fmt=log_fmt) handler.setFormatter(fmt) handler.setLevel(level) listener.addHandler(handler) + logger.debug_to_file("Using async log format: %s", log_fmt) def _log_exec_info(called_args, steps): From 8f7204370e1793153b44f4d50550238e30f74c40 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 21:48:28 -0700 Subject: [PATCH 26/29] Exception type now in error message --- compass/exceptions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compass/exceptions.py b/compass/exceptions.py index 2042b1dd..e2c3f6a4 100644 --- a/compass/exceptions.py +++ b/compass/exceptions.py @@ -13,7 +13,9 @@ def __init__(self, *args, **kwargs): """Init exception and broadcast message to logger""" super().__init__(*args, **kwargs) if args: - logger.error(str(args[0]), stacklevel=2) + logger.error( + "<%s> %s", self.__class__.__name__, args[0], stacklevel=2 + ) class COMPASSNotInitializedError(COMPASSError): From 1bfe00bec14d278ffe6b16bfc6811d1e5dbf425f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 21:49:35 -0700 Subject: [PATCH 27/29] Update tests --- tests/python/unit/test_exceptions.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/python/unit/test_exceptions.py b/tests/python/unit/test_exceptions.py index e7c8ba78..574ff140 100644 --- a/tests/python/unit/test_exceptions.py +++ b/tests/python/unit/test_exceptions.py @@ -12,6 +12,7 @@ COMPASSError, COMPASSValueError, COMPASSNotInitializedError, + COMPASSRuntimeError, ) @@ -33,6 +34,7 @@ def test_exceptions_log_error(caplog, assert_message_was_logged): except COMPASSError: pass + assert_message_was_logged("COMPASSError", "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") @@ -42,6 +44,7 @@ def test_exceptions_log_uncaught_error(assert_message_was_logged): with pytest.raises(COMPASSError): raise COMPASSError(BASIC_ERROR_MESSAGE) + assert_message_was_logged("COMPASSError", "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") @@ -56,6 +59,10 @@ def test_exceptions_log_uncaught_error(assert_message_was_logged): COMPASSValueError, [COMPASSError, ValueError, COMPASSValueError], ), + ( + COMPASSRuntimeError, + [COMPASSError, RuntimeError, COMPASSRuntimeError], + ), ], ) def test_catching_error_by_type( @@ -67,6 +74,7 @@ def test_catching_error_by_type( raise raise_type(BASIC_ERROR_MESSAGE) assert BASIC_ERROR_MESSAGE in str(exc_info.value) + assert_message_was_logged(raise_type.__name__, "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") From 7aa8f7e543d661bb9644c5254231c7927f48705f Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Thu, 20 Nov 2025 21:50:33 -0700 Subject: [PATCH 28/29] Warning class also included in message now --- compass/warn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/compass/warn.py b/compass/warn.py index 89976aab..4694cdfc 100644 --- a/compass/warn.py +++ b/compass/warn.py @@ -13,4 +13,6 @@ def __init__(self, *args, **kwargs): """Init exception and broadcast message to logger.""" super().__init__(*args, **kwargs) if args: - logger.warning(str(args[0]), stacklevel=2) + logger.warning( + "<%s> %s", self.__class__.__name__, args[0], stacklevel=2 + ) From 1b377d3bfc2db1a5420a95879c38fc8b706834b0 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Wed, 3 Dec 2025 09:47:48 -0700 Subject: [PATCH 29/29] Add trace logger call --- compass/utilities/parsing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 4b5da44c..e4280cfe 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -240,6 +240,7 @@ def load_config(config_fp): def convert_paths_to_strings(obj): """[NOT PUBLIC API] Convert all Path instances to strings""" + logger.trace("Converting paths to strings in object: %s", obj) if isinstance(obj, Path): return str(obj) if isinstance(obj, dict):