diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index b3777f3e..fd35f7ad 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -92,6 +92,7 @@ Use `pixi add --feature python-dev ` to add a dependency that is only u - Do not document parameters in the class docstring - do that in the __init__ docstring instead. - All @property and @cached_property method documentation should be one line long and should start with the return type. - "Protected" functions and methods should always be documented using only one-line summary docstrings. +- To exclude functions or classes from the public API documentation, start the docstring with the token ``[NOT PUBLIC API]``. ## 7. Coding Guidelines (Rust) - Workspace-managed deps; update root `Cargo.toml` if adding shared dependency. @@ -102,6 +103,14 @@ Use `pixi add --feature python-dev ` to add a dependency that is only u - Unit tests target granular modules (`tests/python/unit/...`). Add new tests adjacent to similar domain (e.g., new utility → `tests/python/unit/utilities/`). - Integration tests at `tests/python/integration` cover full pipelines. - Coverage thresholds enforced (`--cov-fail-under=30` for unit suite). Keep defensive code minimal; exclude per coverage config if necessary. +- All python test files (e.g. ``test_scenario.py``) should end with the following block of code: + + .. code-block:: python + + if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) + + This allows the (single) file to be executed, running only the tests contained within, which is extremely useful when updating/modifying/adding tests in the file. - Rust tests live in crate `src` using standard Cargo conventions; prefer small, deterministic tests. ## 9. Logging & Observability diff --git a/compass/__init__.py b/compass/__init__.py index 4f75f231..28cb2282 100644 --- a/compass/__init__.py +++ b/compass/__init__.py @@ -1,6 +1,6 @@ """Ordinance document download and structured data extraction""" from ._version import __version__ -from .utilities.logs import _setup_logging_levels, COMPASS_DEBUG_LEVEL +from .utilities.logs import setup_logging_levels, COMPASS_DEBUG_LEVEL -_setup_logging_levels() +setup_logging_levels() diff --git a/compass/exceptions.py b/compass/exceptions.py index 2042b1dd..e2c3f6a4 100644 --- a/compass/exceptions.py +++ b/compass/exceptions.py @@ -13,7 +13,9 @@ def __init__(self, *args, **kwargs): """Init exception and broadcast message to logger""" super().__init__(*args, **kwargs) if args: - logger.error(str(args[0]), stacklevel=2) + logger.error( + "<%s> %s", self.__class__.__name__, args[0], stacklevel=2 + ) class COMPASSNotInitializedError(COMPASSError): diff --git a/compass/scripts/process.py b/compass/scripts/process.py index c6dd6eba..efc9c69f 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -1,6 +1,7 @@ """Ordinance full processing logic""" import time +import json import asyncio import logging from copy import deepcopy @@ -11,7 +12,6 @@ import pandas as pd from elm.web.utilities import get_redirected_url -from compass import __version__ from compass.scripts.download import ( find_jurisdiction_website, download_known_urls, @@ -21,7 +21,7 @@ download_jurisdiction_ordinances_from_website_compass_crawl, filter_ordinance_docs, ) -from compass.exceptions import COMPASSValueError +from compass.exceptions import COMPASSValueError, COMPASSError from compass.extraction import ( extract_ordinance_values, extract_ordinance_text_with_ngram_validation, @@ -101,9 +101,10 @@ LocationFileLog, LogListener, NoLocationFilter, + log_versions, ) from compass.utilities.base import WebSearchParams -from compass.utilities.parsing import load_config +from compass.utilities.parsing import load_config, convert_paths_to_strings from compass.pb import COMPASS_PB @@ -445,6 +446,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 and may include color-coded cost information if the terminal supports it. """ + called_args = locals() if log_level == "DEBUG": log_level = "DEBUG_TO_FILE" @@ -457,38 +459,51 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 ofd=ordinance_file_dir, jdd=jurisdiction_dbs_dir, ) - pk = ProcessKwargs( - known_local_docs, - known_doc_urls, - file_loader_kwargs, - td_kwargs, - tpe_kwargs, - ppe_kwargs, - max_num_concurrent_jurisdictions, - ) - wsp = WebSearchParams( - num_urls_to_check_per_jurisdiction, - max_num_concurrent_browsers, - max_num_concurrent_website_searches, - url_ignore_substrings, - pytesseract_exe_fp, - search_engines, - ) - models = _initialize_model_params(model) - runner = _COMPASSRunner( - dirs=dirs, - log_listener=log_listener, - tech=tech, - models=models, - web_search_params=wsp, - process_kwargs=pk, - perform_se_search=perform_se_search, - perform_website_search=perform_website_search, - log_level=log_level, - ) async with log_listener as ll: _setup_main_logging(dirs.logs, log_level, ll, keep_async_logs) - return await runner.run(jurisdiction_fp) + steps = _check_enabled_steps( + known_local_docs=known_local_docs, + known_doc_urls=known_doc_urls, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + ) + _log_exec_info(called_args, steps) + try: + pk = ProcessKwargs( + known_local_docs, + known_doc_urls, + file_loader_kwargs, + td_kwargs, + tpe_kwargs, + ppe_kwargs, + max_num_concurrent_jurisdictions, + ) + wsp = WebSearchParams( + num_urls_to_check_per_jurisdiction, + max_num_concurrent_browsers, + max_num_concurrent_website_searches, + url_ignore_substrings, + pytesseract_exe_fp, + search_engines, + ) + models = _initialize_model_params(model) + runner = _COMPASSRunner( + dirs=dirs, + log_listener=log_listener, + tech=tech, + models=models, + web_search_params=wsp, + process_kwargs=pk, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + log_level=log_level, + ) + return await runner.run(jurisdiction_fp) + except COMPASSError: + raise + except Exception: + logger.exception("Fatal error during processing") + raise class _COMPASSRunner: @@ -670,7 +685,6 @@ async def run(self, jurisdiction_fp): terminal and may include color-coded cost information if the terminal supports it. """ - logger.info("Running COMPASS version %s", __version__) jurisdictions = _load_jurisdictions_to_process(jurisdiction_fp) num_jurisdictions = len(jurisdictions) @@ -871,6 +885,10 @@ async def run(self): """Download and parse document for a single jurisdiction""" start_time = time.monotonic() doc = None + logger.info( + "Kicking off processing for jurisdiction: %s", + self.jurisdiction.full_name, + ) try: doc = await self._run() finally: @@ -878,12 +896,20 @@ async def run(self): await _record_jurisdiction_info( self.jurisdiction, doc, start_time, self.usage_tracker ) + logger.info( + "Completed processing for jurisdiction: %s", + self.jurisdiction.full_name, + ) return doc async def _run(self): """Search for docs and parse them for ordinances""" if self.known_local_docs: + logger.debug( + "Checking local docs for jurisdiction: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._load_known_local_documents, ) @@ -891,6 +917,10 @@ async def _run(self): return doc if self.known_doc_urls: + logger.debug( + "Checking known URLs for jurisdiction: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._download_known_url_documents, ) @@ -898,6 +928,11 @@ async def _run(self): return doc if self.perform_se_search: + logger.debug( + "Collecting documents using a search engine for " + "jurisdiction: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._find_documents_using_search_engine, ) @@ -905,6 +940,10 @@ async def _run(self): return doc if self.perform_website_search: + logger.debug( + "Collecting documents from the jurisdiction website for: %s", + self.jurisdiction.full_name, + ) doc = await self._try_find_ordinances( method=self._find_documents_from_website, ) @@ -1370,12 +1409,55 @@ def _setup_main_logging(log_dir, level, listener, keep_async_logs): if keep_async_logs: handler = logging.FileHandler(log_dir / "all.log", encoding="utf-8") - fmt = logging.Formatter( - fmt="[%(asctime)s] %(levelname)s - %(taskName)s: %(message)s", - ) + log_fmt = "[%(asctime)s] %(levelname)s - %(taskName)s: %(message)s" + fmt = logging.Formatter(fmt=log_fmt) handler.setFormatter(fmt) handler.setLevel(level) listener.addHandler(handler) + logger.debug_to_file("Using async log format: %s", log_fmt) + + +def _log_exec_info(called_args, steps): + """Log versions and function parameters to file""" + log_versions(logger) + + logger.info( + "Using the following processing step(s):\n\t%s", " -> ".join(steps) + ) + + normalized_args = convert_paths_to_strings(called_args) + logger.debug_to_file( + "Called 'process_jurisdictions_with_openai' with:\n%s", + json.dumps(normalized_args, indent=4), + ) + + +def _check_enabled_steps( + known_local_docs=None, + known_doc_urls=None, + perform_se_search=True, + perform_website_search=True, +): + """Check that at least one processing step is enabled""" + steps = [] + if known_local_docs: + steps.append("Check local document") + if known_doc_urls: + steps.append("Check known document URL") + if perform_se_search: + steps.append("Look for document using search engine") + if perform_website_search: + steps.append("Look for document on jurisdiction website") + + if not steps: + msg = ( + "No processing steps enabled! Please provide at least one of " + "'known_local_docs', 'known_doc_urls', or set at least one of " + "'perform_se_search' or 'perform_website_search' to True." + ) + raise COMPASSValueError(msg) + + return steps def _setup_folders(out_dir, log_dir=None, clean_dir=None, ofd=None, jdd=None): diff --git a/compass/utilities/logs.py b/compass/utilities/logs.py index e7dd6da7..21ac84c7 100644 --- a/compass/utilities/logs.py +++ b/compass/utilities/logs.py @@ -14,7 +14,9 @@ from queue import SimpleQueue from functools import partial, partialmethod from logging.handlers import QueueHandler, QueueListener +from importlib.metadata import version, PackageNotFoundError +from compass import __version__ from compass.exceptions import COMPASSValueError @@ -478,8 +480,38 @@ def _get_existing_records(self): return records -def _setup_logging_levels(): - """Setup COMPASS logging levels""" +def log_versions(logger): + """Log COMPASS and dependency package versions + + Parameters + ---------- + logger : logging.Logger + Logger object to log memory message to. + """ + + logger.info("Running COMPASS version %s", __version__) + packages_to_log = [ + "NREL-ELM", + "openai", + "playwright", + "tf-playwright-stealth", + "rebrowser-playwright", + "camoufox", + "pdftotext", + "pytesseract", + "langchain-text-splitters", + "crawl4ai", + "nltk", + "networkx", + "pandas", + "numpy", + ] + for pkg in packages_to_log: + logger.debug_to_file("- %s version: %s", pkg, _get_version(pkg)) + + +def setup_logging_levels(): + """[NOT PUBLIC API] Setup COMPASS logging levels""" logging.TRACE = 5 logging.addLevelName(logging.TRACE, "TRACE") logging.Logger.trace = partialmethod(logging.Logger.log, logging.TRACE) @@ -491,3 +523,11 @@ def _setup_logging_levels(): logging.Logger.log, logging.DEBUG_TO_FILE ) logging.debug_to_file = partial(logging.log, logging.DEBUG_TO_FILE) + + +def _get_version(pkg_name): + """Get the version string for a package""" + try: + return version(pkg_name) + except PackageNotFoundError: + return "not installed" diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py index 1bb4284a..e4280cfe 100644 --- a/compass/utilities/parsing.py +++ b/compass/utilities/parsing.py @@ -236,3 +236,22 @@ def load_config(config_fp): f"{config_fp.suffix}. Supported extensions are .json5 and .json." ) raise COMPASSValueError(msg) + + +def convert_paths_to_strings(obj): + """[NOT PUBLIC API] Convert all Path instances to strings""" + logger.trace("Converting paths to strings in object: %s", obj) + if isinstance(obj, Path): + return str(obj) + if isinstance(obj, dict): + return { + convert_paths_to_strings(key): convert_paths_to_strings(value) + for key, value in obj.items() + } + if isinstance(obj, list): + return [convert_paths_to_strings(item) for item in obj] + if isinstance(obj, tuple): + return tuple(convert_paths_to_strings(item) for item in obj) + if isinstance(obj, set): + return {convert_paths_to_strings(item) for item in obj} + return obj diff --git a/compass/warn.py b/compass/warn.py index 89976aab..4694cdfc 100644 --- a/compass/warn.py +++ b/compass/warn.py @@ -13,4 +13,6 @@ def __init__(self, *args, **kwargs): """Init exception and broadcast message to logger.""" super().__init__(*args, **kwargs) if args: - logger.warning(str(args[0]), stacklevel=2) + logger.warning( + "<%s> %s", self.__class__.__name__, args[0], stacklevel=2 + ) diff --git a/docs/source/conf.py b/docs/source/conf.py index 2f962fe2..fdcb32bb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -209,26 +209,8 @@ ] -def skip_external_methods(app, what, name, obj, skip, options): - if name in { - "clear", - "pop", - "popitem", - "setdefault", - "update", - } and "MutableMapping" in str(obj): - return True - - if name in {"copy", "fromkeys"} and "UsageTracker" in str(obj): - return True - - if name in {"items", "keys", "values"} and "Mapping" in str(obj): - return True - - if name in {"copy", "get"} and "UserDict" in str(obj): - return True - - if name in { +def _skip_pydantic_methods(name, obj): + return name in { "model_dump_json", "model_json_schema", "model_dump", @@ -254,14 +236,44 @@ def skip_external_methods(app, what, name, obj, skip, options): "schema_json", "update_forward_refs", "validate", - } and "BaseModel" in str(obj): + } and "BaseModel" in str(obj) + + +def _skip_builtin_methods(name, obj): + if name in { + "clear", + "pop", + "popitem", + "setdefault", + "update", + } and "MutableMapping" in str(obj): + return True + + if name in {"items", "keys", "values"} and "Mapping" in str(obj): return True + return name in {"copy", "get"} and "UserDict" in str(obj) + + +def _skip_internal_api(name, obj): + if (getattr(obj, "__doc__", None) or "").startswith("[NOT PUBLIC API]"): + return True + + return name in {"copy", "fromkeys"} and "UsageTracker" in str(obj) + + +def _skip_member(app, what, name, obj, skip, options): + if ( + _skip_internal_api(name, obj) + or _skip_builtin_methods(name, obj) + or _skip_pydantic_methods(name, obj) + ): + return True return None def setup(app): - app.connect("autodoc-skip-member", skip_external_methods) + app.connect("autodoc-skip-member", _skip_member) # -- Extension configuration ------------------------------------------------- diff --git a/docs/source/dev/README.rst b/docs/source/dev/README.rst index 4b93c441..fa047306 100644 --- a/docs/source/dev/README.rst +++ b/docs/source/dev/README.rst @@ -177,7 +177,10 @@ As such, please adhere to these guidelines: Such code is subject to change at any time, so you should never rely on private/protected functionality unless you know what you are doing (in which case you should be relying on the function's code, not docstring). -5) Link any functions and/or classes that you reference in your docstring. +5) If you want to create a function or method meant to be used across the repository but **not** + be included as part of the public API (i.e. not have it included in the autogenerated Sphinx + documentation), start the docstring with the token ``[NOT PUBLIC API]``. +6) Link any functions and/or classes that you reference in your docstring. Sphinx allows interlinks between different sets of documentation, which can be a really convenient way for new users to learn more about the external libraries they are expected to use. For more information on how to set up links in your documentation, please see diff --git a/tests/python/unit/scripts/test_process.py b/tests/python/unit/scripts/test_process.py new file mode 100644 index 00000000..e9411c94 --- /dev/null +++ b/tests/python/unit/scripts/test_process.py @@ -0,0 +1,283 @@ +"""Tests for compass.scripts.process""" + +import logging +from pathlib import Path +from itertools import product + +import pytest + +from compass.exceptions import COMPASSValueError +import compass.scripts.process as process_module +from compass.scripts.process import ( + _COMPASSRunner, + process_jurisdictions_with_openai, +) +from compass.utilities import ProcessKwargs + + +@pytest.fixture +def testing_log_file(tmp_path): + """Logger fixture for testing""" + log_fp = tmp_path / "test.log" + handler = logging.FileHandler(log_fp, encoding="utf-8") + logger = logging.getLogger("compass") + prev_level = logger.level + prev_propagate = logger.propagate + logger.setLevel(logging.ERROR) + logger.propagate = False + logger.addHandler(handler) + + yield log_fp + + handler.flush() + logger.removeHandler(handler) + handler.close() + logger.setLevel(prev_level) + logger.propagate = prev_propagate + + +@pytest.fixture +def patched_runner(monkeypatch): + """Patch the COMPASSRunner to a dummy that bypasses processing""" + + class DummyRunner: + """Minimal runner that bypasses full processing""" + + def __init__(self, **_): + pass + + async def run(self, jurisdiction_fp): + return f"processed {jurisdiction_fp}" + + monkeypatch.setattr(process_module, "_COMPASSRunner", DummyRunner) + + +def test_known_local_docs_missing_file(tmp_path): + """Raise when known_local_docs points to missing config""" + missing_fp = tmp_path / "does_not_exist.json" + runner = _COMPASSRunner( + dirs=None, + log_listener=None, + tech="solar", + models={}, + process_kwargs=ProcessKwargs(str(missing_fp), None), + ) + + with pytest.raises(COMPASSValueError, match="Config file does not exist"): + _ = runner.known_local_docs + + +def test_known_local_docs_logs_missing_file(tmp_path, testing_log_file): + """Log missing known_local_docs config to error file""" + + missing_fp = tmp_path / "does_not_exist.json" + runner = _COMPASSRunner( + dirs=None, + log_listener=None, + tech="solar", + models={}, + process_kwargs=ProcessKwargs(str(missing_fp), None), + ) + + with pytest.raises(COMPASSValueError, match="Config file does not exist"): + _ = runner.known_local_docs + + assert testing_log_file.exists() + assert "Config file does not exist" in testing_log_file.read_text( + encoding="utf-8" + ) + + +@pytest.mark.asyncio +async def test_duplicate_tasks_logs_to_file(tmp_path): + """Log duplicate LLM tasks to error file""" + + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + with pytest.raises(COMPASSValueError, match="Found duplicated task"): + _ = await process_jurisdictions_with_openai( + out_dir=tmp_path / "outputs", + tech="solar", + jurisdiction_fp=jurisdiction_fp, + model=[ + { + "name": "gpt-4.1-mini", + "tasks": ["default", "date_extraction"], + }, + { + "name": "gpt-4.1", + "tasks": [ + "ordinance_text_extraction", + "permitted_use_text_extraction", + "date_extraction", + ], + }, + ], + ) + + log_files = list((tmp_path / "outputs" / "logs").glob("*")) + assert len(log_files) == 1 + assert "Fatal error during processing" not in log_files[0].read_text( + encoding="utf-8" + ) + assert "Found duplicated task" in log_files[0].read_text(encoding="utf-8") + + +@pytest.mark.asyncio +async def test_external_exceptions_logged_to_file(tmp_path, monkeypatch): + """Log external exceptions to error file""" + + def _always_fail(*__, **___): + raise NotImplementedError("Simulated external error") + + monkeypatch.setattr( + process_module, "_initialize_model_params", _always_fail + ) + + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + with pytest.raises(NotImplementedError, match="Simulated external error"): + _ = await process_jurisdictions_with_openai( + out_dir=tmp_path / "outputs", + tech="solar", + jurisdiction_fp=jurisdiction_fp, + ) + + log_files = list((tmp_path / "outputs" / "logs").glob("*")) + assert len(log_files) == 1 + assert "Fatal error during processing" in log_files[0].read_text( + encoding="utf-8" + ) + assert "Simulated external error" in log_files[0].read_text( + encoding="utf-8" + ) + + +@pytest.mark.asyncio +async def test_process_args_logged_at_debug_to_file( + tmp_path, patched_runner, assert_message_was_logged +): + """Log function arguments with DEBUG_TO_FILE level""" + + out_dir = tmp_path / "outputs" + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + result = await process_jurisdictions_with_openai( + out_dir=out_dir, + tech="solar", + jurisdiction_fp=jurisdiction_fp, + log_level="DEBUG", + ) + + assert result == f"processed {jurisdiction_fp}" + + assert_message_was_logged( + "Called 'process_jurisdictions_with_openai' with:", + log_level="DEBUG_TO_FILE", + ) + assert_message_was_logged('"out_dir": ', log_level="DEBUG_TO_FILE") + assert_message_was_logged("outputs", log_level="DEBUG_TO_FILE") + assert_message_was_logged('"tech": "solar"', log_level="DEBUG_TO_FILE") + assert_message_was_logged('"jurisdiction_fp": ', log_level="DEBUG_TO_FILE") + assert_message_was_logged("jurisdictions.csv", log_level="DEBUG_TO_FILE") + assert_message_was_logged( + '"log_level": "DEBUG"', log_level="DEBUG_TO_FILE" + ) + assert_message_was_logged( + '"model": "gpt-4o-mini"', log_level="DEBUG_TO_FILE" + ) + assert_message_was_logged( + '"keep_async_logs": false', log_level="DEBUG_TO_FILE" + ) + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ( + "has_known_local_docs", + "has_known_doc_urls", + "perform_se_search", + "perform_website_search", + ), + [ + pytest.param( + *flags, id=("local-{}_urls-{}_se-{}_web-{}".format(*flags)) + ) + for flags in product([False, True], repeat=4) + ], +) +async def test_process_steps_logged( + tmp_path, + patched_runner, + assert_message_was_logged, + has_known_local_docs, + has_known_doc_urls, + perform_se_search, + perform_website_search, +): + """Log enabled processing steps for every combination of inputs""" + + out_dir = tmp_path / "outputs" + jurisdiction_fp = tmp_path / "jurisdictions.csv" + jurisdiction_fp.touch() + + known_local_docs = None + if has_known_local_docs: + known_local_docs = {"1": [{"source_fp": tmp_path / "local_doc.pdf"}]} + + known_doc_urls = None + if has_known_doc_urls: + known_doc_urls = { + "1": [{"source": "https://example.com/ordinance.pdf"}] + } + + expected_steps = [] + if has_known_local_docs: + expected_steps.append("Check local document") + if has_known_doc_urls: + expected_steps.append("Check known document URL") + if perform_se_search: + expected_steps.append("Look for document using search engine") + if perform_website_search: + expected_steps.append("Look for document on jurisdiction website") + + if not expected_steps: + with pytest.raises( + COMPASSValueError, match="No processing steps enabled" + ): + await process_jurisdictions_with_openai( + out_dir=str(out_dir), + tech="solar", + jurisdiction_fp=str(jurisdiction_fp), + log_level="DEBUG", + known_local_docs=known_local_docs, + known_doc_urls=known_doc_urls, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + ) + return + + result = await process_jurisdictions_with_openai( + out_dir=str(out_dir), + tech="solar", + jurisdiction_fp=str(jurisdiction_fp), + log_level="DEBUG", + known_local_docs=known_local_docs, + known_doc_urls=known_doc_urls, + perform_se_search=perform_se_search, + perform_website_search=perform_website_search, + ) + + assert result == f"processed {jurisdiction_fp}" + + assert_message_was_logged( + "Using the following processing step(s):", log_level="INFO" + ) + assert_message_was_logged(" -> ".join(expected_steps), log_level="INFO") + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/test_exceptions.py b/tests/python/unit/test_exceptions.py index e7c8ba78..574ff140 100644 --- a/tests/python/unit/test_exceptions.py +++ b/tests/python/unit/test_exceptions.py @@ -12,6 +12,7 @@ COMPASSError, COMPASSValueError, COMPASSNotInitializedError, + COMPASSRuntimeError, ) @@ -33,6 +34,7 @@ def test_exceptions_log_error(caplog, assert_message_was_logged): except COMPASSError: pass + assert_message_was_logged("COMPASSError", "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") @@ -42,6 +44,7 @@ def test_exceptions_log_uncaught_error(assert_message_was_logged): with pytest.raises(COMPASSError): raise COMPASSError(BASIC_ERROR_MESSAGE) + assert_message_was_logged("COMPASSError", "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") @@ -56,6 +59,10 @@ def test_exceptions_log_uncaught_error(assert_message_was_logged): COMPASSValueError, [COMPASSError, ValueError, COMPASSValueError], ), + ( + COMPASSRuntimeError, + [COMPASSError, RuntimeError, COMPASSRuntimeError], + ), ], ) def test_catching_error_by_type( @@ -67,6 +74,7 @@ def test_catching_error_by_type( raise raise_type(BASIC_ERROR_MESSAGE) assert BASIC_ERROR_MESSAGE in str(exc_info.value) + assert_message_was_logged(raise_type.__name__, "ERROR") assert_message_was_logged(BASIC_ERROR_MESSAGE, "ERROR") diff --git a/tests/python/unit/test_pb.py b/tests/python/unit/test_pb.py index af0ed08a..8986b0a7 100644 --- a/tests/python/unit/test_pb.py +++ b/tests/python/unit/test_pb.py @@ -1,7 +1,8 @@ """Tests for `compass.pb` progress bar helpers""" -from contextlib import ExitStack from io import StringIO +from pathlib import Path +from contextlib import ExitStack import pytest from rich.console import Console @@ -499,3 +500,7 @@ async def test_compass_website_crawl_prog_bar_duplicate( def test_singleton_instance_accessible(console): """Expose singleton progress bar instance""" assert isinstance(compass.pb.COMPASS_PB, compass.pb._COMPASSProgressBars) + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/test_version.py b/tests/python/unit/test_version.py index 5d0f57d0..2e01ebd6 100644 --- a/tests/python/unit/test_version.py +++ b/tests/python/unit/test_version.py @@ -6,11 +6,16 @@ """ import re +from pathlib import Path + +import pytest import compass -SEMVER_DEV_PATTERN = re.compile(r"^\d+\.\d+\.\d+(?:\.dev\d+\+g[0-9a-f]+)?$") +SEMVER_DEV_PATTERN = re.compile( + r"^\d+\.\d+\.\d+(?:\.dev\d+\+g[0-9A-Fa-f]+(?:\.d\d{8})?)?$" +) def test_version_string_present(): @@ -28,3 +33,7 @@ def test_version_semantic_shape(): ) assert v != "9999", "Version set to placeholder" assert not v.startswith("10000"), "Version set to placeholder" + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/utilities/test_utilities_logs.py b/tests/python/unit/utilities/test_utilities_logs.py index 23defee7..fadfccc5 100644 --- a/tests/python/unit/utilities/test_utilities_logs.py +++ b/tests/python/unit/utilities/test_utilities_logs.py @@ -16,11 +16,12 @@ LocationFileLog, LocationFilter, LogListener, + log_versions, NoLocationFilter, _JsonExceptionFileHandler, _JsonFormatter, _LocalProcessQueueHandler, - _setup_logging_levels, + _get_version, LOGGING_QUEUE, ) @@ -37,6 +38,18 @@ def _speed_up_location_file_log_async_exit(): LocationFileLog.ASYNC_EXIT_SLEEP_SECONDS = original_sleep +@pytest.fixture(scope="module") +def compass_logger(): + """Provide compass logger with DEBUG_TO_FILE level for tests""" + logger = logging.getLogger("compass") + prev_level = logger.level + logger.setLevel("DEBUG_TO_FILE") + try: + yield logger + finally: + logger.setLevel(prev_level) + + class _DummyListener: def __init__(self): self.added_handlers = [] @@ -584,8 +597,7 @@ def test_json_exception_file_handler_multiple_exceptions(tmp_path): def test_setup_logging_levels(): - """Test _setup_logging_levels adds custom logging levels""" - _setup_logging_levels() + """Test setup_logging_levels adds custom logging levels""" assert hasattr(logging, "TRACE") assert logging.TRACE == 5 @@ -623,6 +635,34 @@ def test_local_process_queue_handler_emit(): assert queued_record.msg == "test message" +def test_log_versions_logs_expected_packages( + compass_logger, assert_message_was_logged +): + """Test log_versions emits entries for each tracked package""" + + log_versions(compass_logger) + + expected_packages = [ + "NREL-ELM", + "openai", + "playwright", + "tf-playwright-stealth", + "rebrowser-playwright", + "camoufox", + "pdftotext", + "pytesseract", + "langchain-text-splitters", + "crawl4ai", + "nltk", + "networkx", + "pandas", + "numpy", + ] + assert_message_was_logged("Running COMPASS version", log_level="INFO") + for pkg in expected_packages: + assert_message_was_logged(pkg, log_level="DEBUG_TO_FILE") + + def test_log_listener_context_manager(): """Test LogListener as a context manager""" logger_name = "test_listener_logger" @@ -674,5 +714,10 @@ def emit(self, record): assert len(logger.handlers) == 0 +def test_get_dne_package(): + """Test _get_version for a non-existent package""" + assert _get_version("DNE") == "not installed" + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/utilities/test_utilities_parsing.py b/tests/python/unit/utilities/test_utilities_parsing.py index e505515f..6a1e4b4d 100644 --- a/tests/python/unit/utilities/test_utilities_parsing.py +++ b/tests/python/unit/utilities/test_utilities_parsing.py @@ -10,6 +10,7 @@ from compass.utilities.parsing import ( clean_backticks_from_llm_response, + convert_paths_to_strings, extract_ord_year_from_doc_attrs, llm_response_as_json, load_config, @@ -272,5 +273,48 @@ def test_load_config_invalid_extension(tmp_path): load_config(config_file) +def test_convert_paths_to_strings_all_structures(): + """Test `convert_paths_to_strings` across nested containers""" + + input_obj = { + Path("path_key"): { + "list": [ + Path("inner_list_item"), + {Path("dict_key"): Path("dict_value")}, + ], + "tuple": (Path("inner_tuple_item"), "preserve"), + "set": {Path("inner_set_item"), "inner_literal"}, + }, + "list": [Path("top_list_item"), (Path("tuple_in_list"),)], + "tuple": (Path("top_tuple_item"), {Path("tuple_set_item")}), + "set": { + Path("top_set_item"), + ("nested_tuple", Path("nested_tuple_path")), + }, + "value": "literal", + "path_value": Path("top_value_path"), + } + + result = convert_paths_to_strings(input_obj) + + expected = { + "path_key": { + "list": [ + "inner_list_item", + {"dict_key": "dict_value"}, + ], + "tuple": ("inner_tuple_item", "preserve"), + "set": {"inner_set_item", "inner_literal"}, + }, + "list": ["top_list_item", ("tuple_in_list",)], + "tuple": ("top_tuple_item", {"tuple_set_item"}), + "set": {"top_set_item", ("nested_tuple", "nested_tuple_path")}, + "value": "literal", + "path_value": "top_value_path", + } + + assert result == expected + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])