Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
023cb5d
Update copilot instructions
ppinchuk Nov 19, 2025
7d4820a
Add missing line
ppinchuk Nov 19, 2025
634b08e
Add logging tests
ppinchuk Nov 19, 2025
e3289a3
Adjust main script to catch and log errors as early as possible
ppinchuk Nov 19, 2025
423e25e
Add `log_versions` function
ppinchuk Nov 20, 2025
e711e61
Use new logging function
ppinchuk Nov 20, 2025
b3764e5
Add test for `log_versions`
ppinchuk Nov 20, 2025
7c65d03
Add test for logging parameters to function
ppinchuk Nov 20, 2025
d76191e
Add conversion for `Path` params
ppinchuk Nov 20, 2025
e9cbcfb
Add log about processing steps + test
ppinchuk Nov 20, 2025
4ee1654
Add test for error being thrown
ppinchuk Nov 20, 2025
73bd2b0
Add new func
ppinchuk Nov 20, 2025
c713e1a
Use new utility func
ppinchuk Nov 20, 2025
6417565
Log start and end of processing
ppinchuk Nov 20, 2025
27fbf38
Configure docs to ignore funcs with docstrings that start with `[NOT …
ppinchuk Nov 20, 2025
a8aca8c
Adjust test to cover all permutations
ppinchuk Nov 20, 2025
ac14a65
Document new filter functionality
ppinchuk Nov 20, 2025
b2ba6d8
Log each processing step
ppinchuk Nov 20, 2025
b73cd9f
Minor style update
ppinchuk Nov 20, 2025
7c2a3d9
Minor update to regex
ppinchuk Nov 20, 2025
a3f073a
Merge remote-tracking branch 'origin/main' into pp/logging
ppinchuk Nov 20, 2025
241de09
Fix test for windows
ppinchuk Nov 20, 2025
adae1d2
Remove unnecessary function call
ppinchuk Nov 20, 2025
160fa74
Fix grammar
ppinchuk Nov 20, 2025
e2f703b
Add a few more tests
ppinchuk Nov 20, 2025
b881cf8
Merge remote-tracking branch 'origin/main' into pp/logging
ppinchuk Nov 21, 2025
511d408
Log the format used in file
ppinchuk Nov 21, 2025
8f72043
Exception type now in error message
ppinchuk Nov 21, 2025
1bfe00b
Update tests
ppinchuk Nov 21, 2025
7aa8f7e
Warning class also included in message now
ppinchuk Nov 21, 2025
1b377d3
Add trace logger call
ppinchuk Dec 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Use `pixi add --feature python-dev <package>` to add a dependency that is only u
- Do not document parameters in the class docstring - do that in the __init__ docstring instead.
- All @property and @cached_property method documentation should be one line long and should start with the return type.
- "Protected" functions and methods should always be documented using only one-line summary docstrings.
- To exclude functions or classes from the public API documentation, start the docstring with the token ``[NOT PUBLIC API]``.

## 7. Coding Guidelines (Rust)
- Workspace-managed deps; update root `Cargo.toml` if adding shared dependency.
Expand All @@ -102,6 +103,14 @@ Use `pixi add --feature python-dev <package>` to add a dependency that is only u
- Unit tests target granular modules (`tests/python/unit/...`). Add new tests adjacent to similar domain (e.g., new utility → `tests/python/unit/utilities/`).
- Integration tests at `tests/python/integration` cover full pipelines.
- Coverage thresholds enforced (`--cov-fail-under=30` for unit suite). Keep defensive code minimal; exclude per coverage config if necessary.
- All python test files (e.g. ``test_scenario.py``) should end with the following block of code:

.. code-block:: python

if __name__ == "__main__":
pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])

This allows the (single) file to be executed, running only the tests contained within, which is extremely useful when updating/modifying/adding tests in the file.
- Rust tests live in crate `src` using standard Cargo conventions; prefer small, deterministic tests.

## 9. Logging & Observability
Expand Down
4 changes: 2 additions & 2 deletions compass/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Ordinance document download and structured data extraction"""

from ._version import __version__
from .utilities.logs import _setup_logging_levels, COMPASS_DEBUG_LEVEL
from .utilities.logs import setup_logging_levels, COMPASS_DEBUG_LEVEL

_setup_logging_levels()
setup_logging_levels()
4 changes: 3 additions & 1 deletion compass/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ def __init__(self, *args, **kwargs):
"""Init exception and broadcast message to logger"""
super().__init__(*args, **kwargs)
if args:
logger.error(str(args[0]), stacklevel=2)
logger.error(
"<%s> %s", self.__class__.__name__, args[0], stacklevel=2
)


class COMPASSNotInitializedError(COMPASSError):
Expand Down
156 changes: 119 additions & 37 deletions compass/scripts/process.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Ordinance full processing logic"""

import time
import json
import asyncio
import logging
from copy import deepcopy
Expand All @@ -11,7 +12,6 @@
import pandas as pd
from elm.web.utilities import get_redirected_url

from compass import __version__
from compass.scripts.download import (
find_jurisdiction_website,
download_known_urls,
Expand All @@ -21,7 +21,7 @@
download_jurisdiction_ordinances_from_website_compass_crawl,
filter_ordinance_docs,
)
from compass.exceptions import COMPASSValueError
from compass.exceptions import COMPASSValueError, COMPASSError
from compass.extraction import (
extract_ordinance_values,
extract_ordinance_text_with_ngram_validation,
Expand Down Expand Up @@ -101,9 +101,10 @@
LocationFileLog,
LogListener,
NoLocationFilter,
log_versions,
)
from compass.utilities.base import WebSearchParams
from compass.utilities.parsing import load_config
from compass.utilities.parsing import load_config, convert_paths_to_strings
from compass.pb import COMPASS_PB


Expand Down Expand Up @@ -445,6 +446,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913
and may include color-coded cost information if the terminal
supports it.
"""
called_args = locals()
if log_level == "DEBUG":
log_level = "DEBUG_TO_FILE"

Expand All @@ -457,38 +459,51 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913
ofd=ordinance_file_dir,
jdd=jurisdiction_dbs_dir,
)
pk = ProcessKwargs(
known_local_docs,
known_doc_urls,
file_loader_kwargs,
td_kwargs,
tpe_kwargs,
ppe_kwargs,
max_num_concurrent_jurisdictions,
)
wsp = WebSearchParams(
num_urls_to_check_per_jurisdiction,
max_num_concurrent_browsers,
max_num_concurrent_website_searches,
url_ignore_substrings,
pytesseract_exe_fp,
search_engines,
)
models = _initialize_model_params(model)
runner = _COMPASSRunner(
dirs=dirs,
log_listener=log_listener,
tech=tech,
models=models,
web_search_params=wsp,
process_kwargs=pk,
perform_se_search=perform_se_search,
perform_website_search=perform_website_search,
log_level=log_level,
)
async with log_listener as ll:
_setup_main_logging(dirs.logs, log_level, ll, keep_async_logs)
return await runner.run(jurisdiction_fp)
steps = _check_enabled_steps(
known_local_docs=known_local_docs,
known_doc_urls=known_doc_urls,
perform_se_search=perform_se_search,
perform_website_search=perform_website_search,
)
_log_exec_info(called_args, steps)
try:
pk = ProcessKwargs(
known_local_docs,
known_doc_urls,
file_loader_kwargs,
td_kwargs,
tpe_kwargs,
ppe_kwargs,
max_num_concurrent_jurisdictions,
)
wsp = WebSearchParams(
num_urls_to_check_per_jurisdiction,
max_num_concurrent_browsers,
max_num_concurrent_website_searches,
url_ignore_substrings,
pytesseract_exe_fp,
search_engines,
)
models = _initialize_model_params(model)
runner = _COMPASSRunner(
dirs=dirs,
log_listener=log_listener,
tech=tech,
models=models,
web_search_params=wsp,
process_kwargs=pk,
perform_se_search=perform_se_search,
perform_website_search=perform_website_search,
log_level=log_level,
)
return await runner.run(jurisdiction_fp)
except COMPASSError:
raise
except Exception:
logger.exception("Fatal error during processing")
raise


class _COMPASSRunner:
Expand Down Expand Up @@ -670,7 +685,6 @@ async def run(self, jurisdiction_fp):
terminal and may include color-coded cost information if
the terminal supports it.
"""
logger.info("Running COMPASS version %s", __version__)
jurisdictions = _load_jurisdictions_to_process(jurisdiction_fp)

num_jurisdictions = len(jurisdictions)
Expand Down Expand Up @@ -871,40 +885,65 @@ async def run(self):
"""Download and parse document for a single jurisdiction"""
start_time = time.monotonic()
doc = None
logger.info(
"Kicking off processing for jurisdiction: %s",
self.jurisdiction.full_name,
)
try:
doc = await self._run()
finally:
await self._record_usage()
await _record_jurisdiction_info(
self.jurisdiction, doc, start_time, self.usage_tracker
)
logger.info(
"Completed processing for jurisdiction: %s",
self.jurisdiction.full_name,
)

return doc

async def _run(self):
"""Search for docs and parse them for ordinances"""
if self.known_local_docs:
logger.debug(
"Checking local docs for jurisdiction: %s",
self.jurisdiction.full_name,
)
doc = await self._try_find_ordinances(
method=self._load_known_local_documents,
)
if doc is not None:
return doc

if self.known_doc_urls:
logger.debug(
"Checking known URLs for jurisdiction: %s",
self.jurisdiction.full_name,
)
doc = await self._try_find_ordinances(
method=self._download_known_url_documents,
)
if doc is not None:
return doc

if self.perform_se_search:
logger.debug(
"Collecting documents using a search engine for "
"jurisdiction: %s",
self.jurisdiction.full_name,
)
doc = await self._try_find_ordinances(
method=self._find_documents_using_search_engine,
)
if doc is not None:
return doc

if self.perform_website_search:
logger.debug(
"Collecting documents from the jurisdiction website for: %s",
self.jurisdiction.full_name,
)
doc = await self._try_find_ordinances(
method=self._find_documents_from_website,
)
Expand Down Expand Up @@ -1370,12 +1409,55 @@ def _setup_main_logging(log_dir, level, listener, keep_async_logs):

if keep_async_logs:
handler = logging.FileHandler(log_dir / "all.log", encoding="utf-8")
fmt = logging.Formatter(
fmt="[%(asctime)s] %(levelname)s - %(taskName)s: %(message)s",
)
log_fmt = "[%(asctime)s] %(levelname)s - %(taskName)s: %(message)s"
fmt = logging.Formatter(fmt=log_fmt)
handler.setFormatter(fmt)
handler.setLevel(level)
listener.addHandler(handler)
logger.debug_to_file("Using async log format: %s", log_fmt)


def _log_exec_info(called_args, steps):
"""Log versions and function parameters to file"""
log_versions(logger)

logger.info(
"Using the following processing step(s):\n\t%s", " -> ".join(steps)
)

normalized_args = convert_paths_to_strings(called_args)
logger.debug_to_file(
"Called 'process_jurisdictions_with_openai' with:\n%s",
json.dumps(normalized_args, indent=4),
)


def _check_enabled_steps(
known_local_docs=None,
known_doc_urls=None,
perform_se_search=True,
perform_website_search=True,
):
"""Check that at least one processing step is enabled"""
steps = []
if known_local_docs:
steps.append("Check local document")
if known_doc_urls:
steps.append("Check known document URL")
if perform_se_search:
steps.append("Look for document using search engine")
if perform_website_search:
steps.append("Look for document on jurisdiction website")

if not steps:
msg = (
"No processing steps enabled! Please provide at least one of "
"'known_local_docs', 'known_doc_urls', or set at least one of "
"'perform_se_search' or 'perform_website_search' to True."
)
raise COMPASSValueError(msg)

return steps


def _setup_folders(out_dir, log_dir=None, clean_dir=None, ofd=None, jdd=None):
Expand Down
44 changes: 42 additions & 2 deletions compass/utilities/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
from queue import SimpleQueue
from functools import partial, partialmethod
from logging.handlers import QueueHandler, QueueListener
from importlib.metadata import version, PackageNotFoundError

from compass import __version__
from compass.exceptions import COMPASSValueError


Expand Down Expand Up @@ -478,8 +480,38 @@ def _get_existing_records(self):
return records


def _setup_logging_levels():
"""Setup COMPASS logging levels"""
def log_versions(logger):
"""Log COMPASS and dependency package versions

Parameters
----------
logger : logging.Logger
Logger object to log memory message to.
"""

logger.info("Running COMPASS version %s", __version__)
packages_to_log = [
"NREL-ELM",
"openai",
"playwright",
"tf-playwright-stealth",
"rebrowser-playwright",
"camoufox",
"pdftotext",
"pytesseract",
"langchain-text-splitters",
"crawl4ai",
"nltk",
"networkx",
"pandas",
"numpy",
]
for pkg in packages_to_log:
logger.debug_to_file("- %s version: %s", pkg, _get_version(pkg))


def setup_logging_levels():
"""[NOT PUBLIC API] Setup COMPASS logging levels"""
logging.TRACE = 5
logging.addLevelName(logging.TRACE, "TRACE")
logging.Logger.trace = partialmethod(logging.Logger.log, logging.TRACE)
Expand All @@ -491,3 +523,11 @@ def _setup_logging_levels():
logging.Logger.log, logging.DEBUG_TO_FILE
)
logging.debug_to_file = partial(logging.log, logging.DEBUG_TO_FILE)


def _get_version(pkg_name):
"""Get the version string for a package"""
try:
return version(pkg_name)
except PackageNotFoundError:
return "not installed"
19 changes: 19 additions & 0 deletions compass/utilities/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,22 @@ def load_config(config_fp):
f"{config_fp.suffix}. Supported extensions are .json5 and .json."
)
raise COMPASSValueError(msg)


def convert_paths_to_strings(obj):
"""[NOT PUBLIC API] Convert all Path instances to strings"""
logger.trace("Converting paths to strings in object: %s", obj)
if isinstance(obj, Path):
return str(obj)
if isinstance(obj, dict):
return {
convert_paths_to_strings(key): convert_paths_to_strings(value)
for key, value in obj.items()
}
if isinstance(obj, list):
return [convert_paths_to_strings(item) for item in obj]
if isinstance(obj, tuple):
return tuple(convert_paths_to_strings(item) for item in obj)
if isinstance(obj, set):
return {convert_paths_to_strings(item) for item in obj}
return obj
4 changes: 3 additions & 1 deletion compass/warn.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,6 @@ def __init__(self, *args, **kwargs):
"""Init exception and broadcast message to logger."""
super().__init__(*args, **kwargs)
if args:
logger.warning(str(args[0]), stacklevel=2)
logger.warning(
"<%s> %s", self.__class__.__name__, args[0], stacklevel=2
)
Loading