From f9170fe424c5fa281f24c98e7cf0bd51c07aed5e Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Thu, 20 Nov 2025 14:02:06 -0700
Subject: [PATCH 1/4] Docstring updates

---
 compass/_cli/main.py               |   2 +-
 compass/common/base.py             |  70 +++++++++-
 compass/common/tree.py             |  24 ++--
 compass/exceptions.py              |   1 -
 compass/extraction/apply.py        |  45 ++++++-
 compass/pb.py                      |  90 +++++++------
 compass/scripts/download.py        |  74 ++++++++---
 compass/scripts/process.py         |  67 +++++-----
 compass/services/base.py           |  13 +-
 compass/services/cpu.py            |  24 ++--
 compass/services/threaded.py       |  24 +++-
 compass/services/usage.py          |   9 +-
 compass/utilities/base.py          |  48 +++++--
 compass/utilities/enums.py         |  32 ++++-
 compass/utilities/finalize.py      | 130 +++++++++++--------
 compass/utilities/io.py            |  34 +++--
 compass/utilities/jurisdictions.py |  42 ++++--
 compass/utilities/location.py      |  26 +++-
 compass/utilities/nt.py            |  51 ++++++++
 compass/utilities/parsing.py       |  79 ++++++++----
 compass/validation/content.py      |  54 ++++----
 compass/validation/graphs.py       |  60 ++++++---
 compass/validation/location.py     | 201 ++++++++++++++++++++---------
 compass/warn.py                    |   1 -
 24 files changed, 828 insertions(+), 373 deletions(-)

diff --git a/compass/_cli/main.py b/compass/_cli/main.py
index 1bd608e58..c9e72023d 100644
--- a/compass/_cli/main.py
+++ b/compass/_cli/main.py
@@ -11,7 +11,7 @@
 @click.version_option(version=__version__)
 @click.pass_context
 def main(ctx):
-    """Ordinance command line interface"""
+    """COMPASS command line interface"""
     ctx.ensure_object(dict)
 
 
diff --git a/compass/common/base.py b/compass/common/base.py
index 75954004b..e7df1a974 100644
--- a/compass/common/base.py
+++ b/compass/common/base.py
@@ -128,7 +128,29 @@ def llm_response_does_not_start_with_no(response):
 def setup_async_decision_tree(
     graph_setup_func, usage_sub_label=None, **kwargs
 ):
-    """Setup Async Decision tree for ordinance extraction"""
+    """Setup an ``AsyncDecisionTree`` for ordinance extraction
+
+    Parameters
+    ----------
+    graph_setup_func : callable
+        Factory that returns a fully configured
+        :class:`networkx.DiGraph`.
+    usage_sub_label : str, optional
+        Optional usage label reported to the LLM usage tracker.
+    **kwargs
+        Keyword arguments forwarded to ``graph_setup_func``.
+
+    Returns
+    -------
+    AsyncDecisionTree
+        Decision tree wrapping the graph produced by
+        ``graph_setup_func``.
+
+    Notes
+    -----
+    The function asserts that the tree has recorded at least the system
+    prompt before returning the constructed wrapper.
+    """
     G = graph_setup_func(**kwargs)  # noqa: N806
     tree = AsyncDecisionTree(G, usage_sub_label=usage_sub_label)
     assert len(tree.chat_llm_caller.messages) == 1
@@ -136,7 +158,22 @@ def setup_async_decision_tree(
 
 
 async def run_async_tree(tree, response_as_json=True):
-    """Run Async Decision Tree and return output as dict"""
+    """Run an async decision tree and optionally parse JSON output
+
+    Parameters
+    ----------
+    tree : AsyncDecisionTree
+        Decision tree to execute.
+    response_as_json : bool, optional
+        When ``True`` (default), attempts to parse the LLM response as
+        JSON using :func:`compass.utilities.llm_response_as_json`.
+
+    Returns
+    -------
+    dict or str or None
+        Parsed dictionary when ``response_as_json`` is ``True``, raw
+        response otherwise. Returns ``None`` if execution fails.
+    """
     try:
         response = await tree.async_run()
     except COMPASSRuntimeError:
@@ -149,14 +186,39 @@ async def run_async_tree(tree, response_as_json=True):
 
 
 async def run_async_tree_with_bm(tree, base_messages):
-    """Run Async Decision Tree from base messages; return dict output"""
+    """Run an async decision tree using seed "base" messages
+
+    Parameters
+    ----------
+    tree : AsyncDecisionTree
+        Decision tree to execute.
+    base_messages : list of dict
+        Messages to preload into the tree's chat caller before running.
+
+    Returns
+    -------
+    dict or str or None
+        Output from :func:`run_async_tree`, filtered by the
+        ``response_as_json`` default.
+    """
     tree.chat_llm_caller.messages = base_messages
     assert len(tree.chat_llm_caller.messages) == len(base_messages)
     return await run_async_tree(tree)
 
 
 def empty_output(feature):
-    """Empty output for a feature (not found in text)"""
+    """Return the default empty result for a missing feature
+
+    Parameters
+    ----------
+    feature : str
+        Name of the feature to seed in the empty output structure.
+
+    Returns
+    -------
+    list of dict
+        Empty result placeholders used by downstream extraction logic.
+    """
     if feature in {"structures", "property line"}:
         return [
             {"feature": f"{feature} (participating)"},
diff --git a/compass/common/tree.py b/compass/common/tree.py
index 2d948cb45..05bf036a9 100644
--- a/compass/common/tree.py
+++ b/compass/common/tree.py
@@ -66,32 +66,22 @@ def __init__(self, graph, usage_sub_label=None):
 
     @property
     def chat_llm_caller(self):
-        """ChatLLMCaller: ChatLLMCaller instance for this tree"""
+        """ChatLLMCaller: LLM caller bound to the decision tree"""
         return self.graph.graph["chat_llm_caller"]
 
     @cached_property
     def tree_name(self):
-        """str: Name of the decision tree"""
+        """str: Configured decision tree name"""
         return self._g.graph.get("_d_tree_name", "Unknown decision tree")
 
     @property
     def messages(self):
-        """Get a list of the conversation messages with the LLM
-
-        Returns
-        -------
-        list
-        """
+        """list: Conversation messages exchanged with the LLM"""
         return self.chat_llm_caller.messages
 
     @property
     def all_messages_txt(self):
-        """Get a printout of the full conversation with the LLM
-
-        Returns
-        -------
-        str
-        """
+        """str: Formatted conversation transcript"""
         messages = [
             f"{msg['role'].upper()}: {msg['content']}" for msg in self.messages
         ]
@@ -140,6 +130,12 @@ async def async_run(self, node0="init"):
         out : str or None
             Final response from LLM at the leaf node or ``None`` if an
             ``AttributeError`` was raised during execution.
+
+        Raises
+        ------
+        compass.exceptions.COMPASSRuntimeError
+            Raised when the traversal encounters an unexpected
+            exception that is not an ``AttributeError``.
         """
 
         self._history = []
diff --git a/compass/exceptions.py b/compass/exceptions.py
index 2042b1dd5..53de495e3 100644
--- a/compass/exceptions.py
+++ b/compass/exceptions.py
@@ -10,7 +10,6 @@ class COMPASSError(Exception):
     """Generic COMPASS Error"""
 
     def __init__(self, *args, **kwargs):
-        """Init exception and broadcast message to logger"""
         super().__init__(*args, **kwargs)
         if args:
             logger.error(str(args[0]), stacklevel=2)
diff --git a/compass/extraction/apply.py b/compass/extraction/apply.py
index c36bed35e..590a371b1 100644
--- a/compass/extraction/apply.py
+++ b/compass/extraction/apply.py
@@ -37,13 +37,20 @@ async def check_for_ordinance_info(
         has the ``"contains_ord_info"`` key, it will not be processed.
         To force a document to be processed by this function, remove
         that key from the documents ``attrs``.
+    model_config : compass.llm.config.LLMConfig
+        Configuration describing which LLM service, splitter, and call
+        parameters should be used for extraction.
+    heuristic : object
+        Domain-specific heuristic implementing a ``check`` method to
+        qualify text chunks for further processing.
     tech : str
         Technology of interest (e.g. "solar", "wind", etc). This is
         used to set up some document validation decision trees.
-    text_splitter : LCTextSplitter, optional
-        Optional Langchain text splitter (or subclass instance), or any
-        object that implements a `split_text` method. The method should
-        take text as input (str) and return a list of text chunks.
+    ordinance_text_collector_class : type
+        Collector class invoked to capture ordinance text chunks.
+    permitted_use_text_collector_class : type, optional
+        Collector class used to capture permitted-use districts text.
+        When ``None``, the permitted-use workflow is skipped.
     usage_tracker : UsageTracker, optional
         Optional tracker instance to monitor token usage during
         LLM calls. By default, ``None``.
@@ -61,6 +68,12 @@ async def check_for_ordinance_info(
         and an ``"ordinance_text"`` key containing the ordinance text
         snippet. Note that the snippet may contain other info as well,
         but should encapsulate all of the ordinance text.
+
+    Notes
+    -----
+    The function updates progress bar logging as chunks are processed
+    and sets ``contains_district_info`` when
+    ``permitted_use_text_collector_class`` is provided.
     """
     if "contains_ord_info" in doc.attrs:
         return doc
@@ -134,6 +147,9 @@ async def extract_date(doc, model_config, usage_tracker=None):
     ----------
     doc : elm.web.document.BaseDocument
         A document potentially containing date information.
+    model_config : compass.llm.config.LLMConfig
+        Configuration describing which LLM service, splitter, and call
+        parameters should be used for date extraction.
     usage_tracker : UsageTracker, optional
         Optional tracker instance to monitor token usage during
         LLM calls. By default, ``None``.
@@ -145,6 +161,11 @@ async def extract_date(doc, model_config, usage_tracker=None):
         the parsing are stored in the documents attrs. In particular,
         the attrs will contain a ``"date"`` key that will contain the
         parsed date information.
+
+    Notes
+    -----
+    Documents already containing a ``"date"`` attribute are returned
+    without reprocessing.
     """
     if "date" in doc.attrs:
         logger.debug(
@@ -186,8 +207,9 @@ async def extract_ordinance_text_with_llm(
         Optional Langchain text splitter (or subclass instance), or any
         object that implements a `split_text` method. The method should
         take text as input (str) and return a list of text chunks.
-    extractor : WindOrdinanceTextExtractor
-        Object used for ordinance text extraction.
+    extractor : compass.extraction.base.OrdinanceTextExtractor
+        Extractor instance exposing ``parsers`` that consume text
+        chunks and update ``doc.attrs``.
     original_text_key : str
         String corresponding to the `doc.attrs` key containing the
         original text (before extraction).
@@ -250,6 +272,9 @@ async def extract_ordinance_text_with_ngram_validation(
         Optional Langchain text splitter (or subclass instance), or any
         object that implements a `split_text` method. The method should
         take text as input (str) and return a list of text chunks.
+    extractor : compass.extraction.base.OrdinanceTextExtractor
+        Extractor instance exposing ``parsers`` that consume text
+        chunks and update ``doc.attrs``.
     original_text_key : str
         String corresponding to the `doc.attrs` key containing the
         original text (before extraction).
@@ -406,6 +431,9 @@ async def extract_ordinance_values(doc, parser, text_key, out_key):
         that are found to contain ordinance data. Note that if the
         document's attrs does not contain the `text_key` key, it will
         not be processed.
+    parser : compass.extraction.base.StructuredParser
+        Parser instance with an async ``parse`` method that converts
+        cleaned ordinance text into structured values.
     text_key : str
         Name of the key under which cleaned text is stored in
         `doc.attrs`. This text should be ready for extraction.
@@ -418,6 +446,11 @@ async def extract_ordinance_values(doc, parser, text_key, out_key):
     elm.web.document.BaseDocument
         Document that has been parsed for ordinance values. The results
         of the extraction are stored in the document's attrs.
+
+    Notes
+    -----
+    When the cleaned text is missing or empty the function emits a
+    :class:`compass.warn.COMPASSWarning` and leaves ``doc`` unchanged.
     """
     if not doc.attrs.get(text_key):
         msg = (
diff --git a/compass/pb.py b/compass/pb.py
index 740f6697b..146314a33 100644
--- a/compass/pb.py
+++ b/compass/pb.py
@@ -74,7 +74,19 @@ def render(self, task):  # noqa: PLR6301
 
 
 class _COMPASSProgressBars:
-    """COMPASS progress bar configurations"""
+    """Manage the suite of rich progress bars used by COMPASS runs
+
+    The class maintains a primary progress bar plus a set of
+    jurisdiction-scoped progress bars for downloads, crawling, and
+    parsing subtasks. It centralizes creation, teardown, and cost
+    tracking so CLI runs can display consistent status updates.
+
+    Notes
+    -----
+    Instances are typically accessed via the module-level singleton
+    :data:`COMPASS_PB`. Use the context managers for scoped tasks to
+    ensure progress bars are removed even when exceptions occur.
+    """
 
     def __init__(self, console=None):
         """
@@ -114,7 +126,7 @@ def __init__(self, console=None):
 
     @property
     def group(self):
-        """rich.console.Group: Group of renderable progress bars."""
+        """rich.console.Group: Group of renderable progress bars"""
         return self._group
 
     def create_main_task(self, num_jurisdictions):
@@ -148,15 +160,13 @@ def create_main_task(self, num_jurisdictions):
         )
 
     def progress_main_task(self):
-        """Advance the main task one step
-
-        In other words, mark one jurisdiction as complete.
+        """Advance the main jurisdiction task by one unit
 
         Raises
         ------
         COMPASSNotInitializedError
-            If the main task has not been set up (i.e.
-            `create_main_task` has not been called).
+            If the main task has not been set up via
+            :meth:`create_main_task`.
         """
         if self._main_task is None:
             msg = (
@@ -168,18 +178,16 @@ def progress_main_task(self):
         self._main.update(self._main_task, advance=1)
 
     def update_total_cost(self, cost, replace=False):
-        """Update the total cost of the run
+        """Update the aggregate LLM cost displayed in the main bar
 
         Parameters
         ----------
-        cost : int or float
-            Cost value used for update.
+        cost : float
+            Cost increment or replacement value in US dollars.
         replace : bool, optional
-            If ``True``, the `cost` input will completely replace the
-            total cost, but only if the `cost` value is equal to or
-            larger than the existing total cost (i.e. we never want the
-            cost to decrease). If ``False``, the `cost` input is just
-            added to the running total. By default, ``False``.
+            When ``True`` the total cost is replaced by ``cost`` ,
+            provided it does not move backwards. When ``False``
+            the cost is added cumulatively. By default, ``False``.
         """
         if replace:
             if cost + 0.01 >= self._total_cost:
@@ -192,15 +200,15 @@ def update_total_cost(self, cost, replace=False):
 
     @contextmanager
     def jurisdiction_prog_bar(self, location, progress_main=True):
-        """Set a progress bar for the processing of one jurisdiction
+        """Context manager for jurisdiction-wide processing progress
 
         Parameters
         ----------
         location : str
             Name of jurisdiction being processed.
-        progress_main : bool, default=True
-            Option to progress the main task when exiting this context
-            manager.
+        progress_main : bool, optional
+            If ``True``, the main jurisdiction task advances when the
+            context exits successfully. By default, ``True``.
 
         Yields
         ------
@@ -259,10 +267,10 @@ def update_jurisdiction_task(self, location, *args, **kwargs):
 
     @contextmanager
     def jurisdiction_sub_prog(self, location):
-        """Start a sub-progress update area for location
+        """Context manager for text-only jurisdiction sub-progress
 
-        This type of sub-progress does not have a bar, so it's useful
-        for tasks with an unknown length/duration.
+        This variant omits a progress bar and is intended for steps with
+        unknown durations, such as intermediate parsing tasks.
 
         Parameters
         ----------
@@ -296,7 +304,7 @@ def jurisdiction_sub_prog(self, location):
 
     @contextmanager
     def jurisdiction_sub_prog_bar(self, location):
-        """Start a sub-progress bar for location
+        """Context manager for jurisdiction sub-progress with a bar
 
         Parameters
         ----------
@@ -337,7 +345,7 @@ def jurisdiction_sub_prog_bar(self, location):
 
     @asynccontextmanager
     async def file_download_prog_bar(self, location, num_downloads):
-        """Display a progress bar for file downloads for a jurisdiction
+        """Async context manager for jurisdiction download progress
 
         Parameters
         ----------
@@ -368,7 +376,7 @@ async def file_download_prog_bar(self, location, num_downloads):
             )
 
     def start_file_download_prog_bar(self, location, num_downloads):
-        """Setup a progress bar for download of files for a jurisdiction
+        """Create and register a download progress bar for a location
 
         Parameters
         ----------
@@ -377,10 +385,10 @@ def start_file_download_prog_bar(self, location, num_downloads):
         num_downloads : int
             Total number of downloads being processed.
 
-        Yields
-        ------
-        rich.progress.Progress
-            `rich` progress bar initialized for this jurisdiction.
+        Returns
+        -------
+        tuple
+            Two-item tuple of the progress instance and created task ID.
 
         Raises
         ------
@@ -417,7 +425,7 @@ def start_file_download_prog_bar(self, location, num_downloads):
     async def tear_down_file_download_prog_bar(
         self, location, num_downloads, pb, task
     ):
-        """Tear down the progress bar showing file downloads
+        """Complete and remove a file download progress bar
 
         Parameters
         ----------
@@ -437,7 +445,7 @@ async def tear_down_file_download_prog_bar(
         self._group.renderables.remove(pb)
 
     def update_download_task(self, location, *args, **kwargs):
-        """Update the task corresponding to the jurisdiction download
+        """Update a jurisdiction download progress entry
 
         Parameters
         ----------
@@ -452,14 +460,14 @@ def update_download_task(self, location, *args, **kwargs):
 
     @asynccontextmanager
     async def website_crawl_prog_bar(self, location, num_pages):
-        """Set a progress bar for crawling jurisdiction websites
+        """Async context manager for website crawling progress
 
         Parameters
         ----------
         location : str
             Name of jurisdiction being processed.
-        num_downloads : int
-            Total number of downloads being processed.
+        num_pages : int
+            Total number of pages expected for the crawl.
 
         Yields
         ------
@@ -516,7 +524,7 @@ def _remove_website_crawl_prog_bar(self, location):
         self._group.renderables.remove(pb)
 
     def update_website_crawl_task(self, location, *args, **kwargs):
-        """Update task corresponding to the jurisdiction website crawl
+        """Update the website crawl progress for a jurisdiction
 
         Parameters
         ----------
@@ -530,7 +538,7 @@ def update_website_crawl_task(self, location, *args, **kwargs):
         self._wc_pbs[location].update(task_id, *args, **kwargs)
 
     def update_website_crawl_doc_found(self, location):
-        """Update task to say that one more document has been found
+        """Increment the count of documents discovered during crawling
 
         Parameters
         ----------
@@ -548,14 +556,14 @@ def update_website_crawl_doc_found(self, location):
 
     @asynccontextmanager
     async def compass_website_crawl_prog_bar(self, location, num_pages):
-        """Set a progress bar for compass-style crawling of websites
+        """Async context manager for COMPASS-style website crawling
 
         Parameters
         ----------
         location : str
             Name of jurisdiction being processed.
-        num_downloads : int
-            Total number of downloads being processed.
+        num_pages : int
+            Total number of pages expected for the crawl.
 
         Yields
         ------
@@ -612,7 +620,7 @@ def _remove_compass_website_crawl_prog_bar(self, location):
         self._group.renderables.remove(pb)
 
     def update_compass_website_crawl_task(self, location, *args, **kwargs):
-        """Update task corresponding to the jurisdiction website crawl
+        """Update COMPASS-style crawl progress for a jurisdiction
 
         Parameters
         ----------
@@ -626,7 +634,7 @@ def update_compass_website_crawl_task(self, location, *args, **kwargs):
         self._cwc_pbs[location].update(task_id, *args, **kwargs)
 
     def update_compass_website_crawl_doc_found(self, location):
-        """Update task to say that one more document has been found
+        """Increment COMPASS-style crawl document discovery count
 
         Parameters
         ----------
diff --git a/compass/scripts/download.py b/compass/scripts/download.py
index 5d1bcbfd6..c2910a07b 100644
--- a/compass/scripts/download.py
+++ b/compass/scripts/download.py
@@ -161,6 +161,11 @@ async def find_jurisdiction_website(
 ):
     """Search for the main landing page of a given jurisdiction
 
+    This function submits two pre-determined queries based on the
+    jurisdiction name, prioritizing official landing pages. Additional
+    ``kwargs`` (for example, alternate search engines) can be supplied
+    to fine-tune behavior.
+
     Parameters
     ----------
     jurisdiction : Jurisdiction
@@ -188,6 +193,12 @@ async def find_jurisdiction_website(
     usage_tracker : UsageTracker, optional
         Optional tracker instance to monitor token usage during
         LLM calls. By default, ``None``.
+    url_ignore_substrings : list of str, optional
+        URL substrings that should be excluded from search results.
+        Substrings are applied case-insensitively. By default, ``None``.
+    **kwargs
+        Additional arguments forwarded to
+        :func:`elm.web.search.run.search_with_fallback`.
 
     Returns
     -------
@@ -251,6 +262,9 @@ async def download_jurisdiction_ordinances_from_website(
     ----------
     website : str
         URL of the jurisdiction website to search.
+    heuristic : callable
+        Callable taking an :class:`elm.web.document.BaseDocument` and
+        returning ``True`` when the document should be kept.
     keyword_points : dict
         Dictionary of keyword points to use for scoring links.
         Keys are keywords, values are points to assign to links
@@ -294,7 +308,7 @@ async def download_jurisdiction_ordinances_from_website(
         no ordinance document was found.
     results : list, optional
         List of crawl4ai results containing metadata about the crawled
-        pages. This is only returned if `return_c4ai_results` is
+        pages. Only returned when ``return_c4ai_results`` evaluates to
         ``True``.
 
     Notes
@@ -376,6 +390,9 @@ async def download_jurisdiction_ordinances_from_website_compass_crawl(
     ----------
     website : str
         URL of the jurisdiction website to search.
+    heuristic : callable
+        Callable taking an :class:`elm.web.document.BaseDocument` and
+        returning ``True`` when the document should be kept.
     keyword_points : dict
         Dictionary of keyword points to use for scoring links.
         Keys are keywords, values are points to assign to links
@@ -387,7 +404,13 @@ async def download_jurisdiction_ordinances_from_website_compass_crawl(
         "pw_launch_kwargs" key in these will also be used to initialize
         the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch`
         used for the Google URL search. By default, ``None``.
-    max_urls : int, optional
+    already_visited : set of str, optional
+        URLs that have already been crawled and should be skipped.
+        By default, ``None``.
+    num_link_scores_to_check_per_page : int, default=4
+        Number of top-scoring links to visit per page.
+        By default, ``4``.
+    max_urls : int, default=100
         Max number of URLs to check from the website before terminating
         the search. By default, ``100``.
     crawl_semaphore : :class:`asyncio.Semaphore`, optional
@@ -471,12 +494,11 @@ async def download_jurisdiction_ordinance_using_search_engine(
 
     Parameters
     ----------
+    question_templates : sequence of str
+        Query templates that will be formatted with the jurisdiction
+        name before submission to the search engine.
     jurisdiction : Jurisdiction
         Location objects representing the jurisdiction.
-    model_configs : dict
-        Dictionary of :class:`~compass.llm.config.LLMConfig` instances.
-        Should have at minium a "default" key that is used as a fallback
-        for all tasks.
     num_urls : int, optional
         Number of unique Google search result URL's to check for
         ordinance document. By default, ``5``.
@@ -499,9 +521,14 @@ async def download_jurisdiction_ordinance_using_search_engine(
         playwright browsers used to download content from the web open
         concurrently. If ``None``, no limits are applied.
         By default, ``None``.
-    usage_tracker : UsageTracker, optional
-        Optional tracker instance to monitor token usage during
-        LLM calls. By default, ``None``.
+    url_ignore_substrings : list of str, optional
+        URL substrings that should be excluded from search results.
+        Substrings are applied case-insensitively. By default, ``None``.
+    **kwargs
+        Additional keyword arguments forwarded to
+        :func:`elm.web.search.run.web_search_links_as_docs`. Common
+        entries include ``usage_tracker`` for logging LLM usage and
+        extra Playwright configuration.
 
     Returns
     -------
@@ -571,17 +598,29 @@ async def filter_ordinance_docs(
 
     Parameters
     ----------
+    docs : sequence of elm.web.document.BaseDocument
+        Documents to screen for ordinance content.
     jurisdiction : Jurisdiction
         Location objects representing the jurisdiction.
     model_configs : dict
         Dictionary of LLMConfig instances. Should have at minium a
         "default" key that is used as a fallback for all tasks.
+    heuristic : object
+        Domain-specific heuristic implementing a ``check`` method to
+        qualify ordinance content.
     tech : str
         Technology of interest (e.g. "solar", "wind", etc). This is
         used to set up some document validation decision trees.
+    ordinance_text_collector_class : type
+        Collector class used to extract ordinance text sections.
+    permitted_use_text_collector_class : type
+        Collector class used to extract permitted-use text sections.
     usage_tracker : UsageTracker, optional
         Optional tracker instance to monitor token usage during
         LLM calls. By default, ``None``.
+    check_for_correct_jurisdiction : bool, default=True
+        If ``True`` run jurisdiction validation before, content checks.
+        By default, ``True``.
 
     Returns
     -------
@@ -589,6 +628,11 @@ async def filter_ordinance_docs(
         List of :obj:`~elm.web.document.BaseDocument` instances possibly
         containing ordinance information, or ``None`` if no ordinance
         document was found.
+
+    Notes
+    -----
+    The function updates CLI progress bars to reflect each filtering
+    phase and returns documents sorted by quality heuristics.
     """
     if check_for_correct_jurisdiction:
         COMPASS_PB.update_jurisdiction_task(
@@ -656,7 +700,7 @@ async def _docs_from_web_search(
     on_search_complete_hook,
     **kwargs,
 ):
-    """Download docs from web using jurisdiction queries"""
+    """Download documents from the web using jurisdiction queries"""
     queries = [
         question.format(jurisdiction=jurisdiction.full_name)
         for question in question_templates
@@ -690,7 +734,7 @@ async def _docs_from_web_search(
 async def _down_select_docs_correct_jurisdiction(
     docs, jurisdiction, usage_tracker, model_config
 ):
-    """Remove all documents not pertaining to the jurisdiction"""
+    """Remove documents that do not match the target jurisdiction"""
     jurisdiction_validator = JurisdictionValidator(
         text_splitter=model_config.text_splitter,
         llm_service=model_config.llm_service,
@@ -716,7 +760,7 @@ async def _down_select_docs_correct_content(
     permitted_use_text_collector_class,
     usage_tracker,
 ):
-    """Remove all documents that don't contain ordinance info"""
+    """Remove documents that do not contain ordinance information"""
     return await filter_documents(
         docs,
         validation_coroutine=_contains_ordinances,
@@ -733,7 +777,7 @@ async def _down_select_docs_correct_content(
 async def _contains_ordinances(
     doc, model_configs, usage_tracker=None, **kwargs
 ):
-    """Helper coroutine that checks for ordinance and date info"""
+    """Determine whether a document contains ordinance information"""
     model_config = model_configs.get(
         LLMTasks.DOCUMENT_CONTENT_VALIDATION,
         model_configs[LLMTasks.DEFAULT],
@@ -757,7 +801,7 @@ async def _contains_ordinances(
 
 
 def _sort_final_ord_docs(all_ord_docs):
-    """Sort the list of documents by year, type, and text length"""
+    """Sort ordinance documents by desirability heuristics"""
     if not all_ord_docs:
         return None
 
@@ -765,7 +809,7 @@ def _sort_final_ord_docs(all_ord_docs):
 
 
 def _ord_doc_sorting_key(doc):
-    """Sorting key for documents. The higher this value, the better"""
+    """Compute a composite sorting score for ordinance documents"""
     latest_year, latest_month, latest_day = doc.attrs.get("date", (-1, -1, -1))
     best_docs_from_website = doc.attrs.get(_SCORE_KEY, 0)
     prefer_pdf_files = isinstance(doc, PDFDocument)
diff --git a/compass/scripts/process.py b/compass/scripts/process.py
index c6dd6eba1..175bd684a 100644
--- a/compass/scripts/process.py
+++ b/compass/scripts/process.py
@@ -518,7 +518,7 @@ def __init__(
 
     @cached_property
     def browser_semaphore(self):
-        """asyncio.Semaphore or None: Sem to limit # of browsers"""
+        """asyncio.Semaphore or None: Browser concurrency limiter"""
         return (
             asyncio.Semaphore(
                 self.web_search_params.max_num_concurrent_browsers
@@ -529,7 +529,7 @@ def browser_semaphore(self):
 
     @cached_property
     def crawl_semaphore(self):
-        """asyncio.Semaphore or None: Sem to limit # of crawls"""
+        """asyncio.Semaphore or None: Concurrency limiter for crawls"""
         return (
             asyncio.Semaphore(
                 self.web_search_params.max_num_concurrent_website_searches
@@ -540,7 +540,7 @@ def crawl_semaphore(self):
 
     @cached_property
     def search_engine_semaphore(self):
-        """asyncio.Semaphore or None: Sem to limit # of SE queries"""
+        """asyncio.Semaphore: Concurrency limiter for search queries"""
         return asyncio.Semaphore(MAX_CONCURRENT_SEARCH_ENGINE_QUERIES)
 
     @cached_property
@@ -556,14 +556,14 @@ def _jurisdiction_semaphore(self):
 
     @property
     def jurisdiction_semaphore(self):
-        """asyncio.Semaphore or AsyncExitStack: Jurisdictions limit"""
+        """asyncio.Semaphore or AsyncExitStack: Jurisdiction context"""
         if self._jurisdiction_semaphore is None:
             return AsyncExitStack()
         return self._jurisdiction_semaphore
 
     @cached_property
     def file_loader_kwargs(self):
-        """dict: Keyword arguments for `AsyncWebFileLoader`"""
+        """dict: Keyword arguments for ``AsyncWebFileLoader``"""
         file_loader_kwargs = _configure_file_loader_kwargs(
             self.process_kwargs.file_loader_kwargs
         )
@@ -576,7 +576,7 @@ def file_loader_kwargs(self):
 
     @cached_property
     def local_file_loader_kwargs(self):
-        """dict: Keyword arguments for `AsyncLocalFileLoader`"""
+        """dict: Keyword arguments for ``AsyncLocalFileLoader``"""
         file_loader_kwargs = {
             "pdf_read_coroutine": read_pdf_file,
             "pdf_read_kwargs": (
@@ -596,7 +596,7 @@ def local_file_loader_kwargs(self):
 
     @cached_property
     def known_local_docs(self):
-        """dict: Known filepaths by jurisdiction code"""
+        """dict: Known filepaths keyed by jurisdiction code"""
         known_local_docs = self.process_kwargs.known_local_docs or {}
         if isinstance(known_local_docs, str):
             known_local_docs = load_config(known_local_docs)
@@ -604,7 +604,7 @@ def known_local_docs(self):
 
     @cached_property
     def known_doc_urls(self):
-        """dict: Known URL's keyed by jurisdiction code"""
+        """dict: Known URLs keyed by jurisdiction code"""
         known_doc_urls = self.process_kwargs.known_doc_urls or {}
         if isinstance(known_doc_urls, str):
             known_doc_urls = load_config(known_doc_urls)
@@ -612,12 +612,12 @@ def known_doc_urls(self):
 
     @cached_property
     def tpe_kwargs(self):
-        """dict: Keyword arguments for `ThreadPoolExecutor`"""
+        """dict: Keyword arguments for ``ThreadPoolExecutor``"""
         return _configure_thread_pool_kwargs(self.process_kwargs.tpe_kwargs)
 
     @cached_property
     def _base_services(self):
-        """list: List of required services to run for processing"""
+        """list: Services required to support jurisdiction processing"""
         base_services = [
             TempFileCachePB(
                 td_kwargs=self.process_kwargs.td_kwargs,
@@ -704,7 +704,7 @@ async def run(self, jurisdiction_fp):
         return run_msg
 
     async def _run_all(self, jurisdictions):
-        """Process all jurisdictions with running services"""
+        """Process all jurisdictions while required services run"""
         services = [model.llm_service for model in set(self.models.values())]
         services += self._base_services
         _ = self.file_loader_kwargs  # init loader kwargs once
@@ -743,7 +743,7 @@ async def _run_all(self, jurisdictions):
     async def _processed_jurisdiction_info_with_pb(
         self, jurisdiction, *args, **kwargs
     ):
-        """Process jurisdiction and update progress bar"""
+        """Process a jurisdiction while updating the progress bar"""
         async with self.jurisdiction_semaphore:
             with COMPASS_PB.jurisdiction_prog_bar(jurisdiction.full_name):
                 return await self._processed_jurisdiction_info(
@@ -751,7 +751,7 @@ async def _processed_jurisdiction_info_with_pb(
                 )
 
     async def _processed_jurisdiction_info(self, *args, **kwargs):
-        """Drop `doc` from RAM and only keep enough info to re-build"""
+        """Convert processed document to minimal metadata"""
 
         doc = await self._process_jurisdiction_with_logging(*args, **kwargs)
 
@@ -771,7 +771,7 @@ async def _process_jurisdiction_with_logging(
         known_doc_urls=None,
         usage_tracker=None,
     ):
-        """Retrieve ordinance document with async logs"""
+        """Retrieve ordinance document with location-scoped logging"""
         async with LocationFileLog(
             self.log_listener,
             self.dirs.logs,
@@ -868,7 +868,14 @@ def _tracked_progress(self):
         self._jsp = None
 
     async def run(self):
-        """Download and parse document for a single jurisdiction"""
+        """Download and parse ordinances for a single jurisdiction
+
+        Returns
+        -------
+        elm.web.document.BaseDocument or None
+            Document containing ordinance information, or ``None`` when
+            no valid ordinance content was identified.
+        """
         start_time = time.monotonic()
         doc = None
         try:
@@ -882,7 +889,7 @@ async def run(self):
         return doc
 
     async def _run(self):
-        """Search for docs and parse them for ordinances"""
+        """Search for documents and parse them for ordinances"""
         if self.known_local_docs:
             doc = await self._try_find_ordinances(
                 method=self._load_known_local_documents,
@@ -914,7 +921,7 @@ async def _run(self):
         return None
 
     async def _try_find_ordinances(self, method, *args, **kwargs):
-        """Try to find ordinances using specified method"""
+        """Execute a retrieval method and parse resulting documents"""
         docs = await method(*args, **kwargs)
         if docs is None:
             return None
@@ -926,7 +933,7 @@ async def _try_find_ordinances(self, method, *args, **kwargs):
         return await self._parse_docs_for_ordinances(docs)
 
     async def _load_known_local_documents(self):
-        """Load local ordinance documents"""
+        """Load ordinance documents from known local file paths"""
 
         docs = await load_known_docs(
             self.jurisdiction,
@@ -968,7 +975,7 @@ async def _load_known_local_documents(self):
         return docs
 
     async def _download_known_url_documents(self):
-        """Download ordinance documents from known URLs"""
+        """Download ordinance documents from pre-specified URLs"""
 
         docs = await download_known_urls(
             self.jurisdiction,
@@ -1011,7 +1018,7 @@ async def _download_known_url_documents(self):
         return docs
 
     async def _find_documents_using_search_engine(self):
-        """Search the web for an ordinance document and construct it"""
+        """Search the web for ordinance docs using search engines"""
         docs = await download_jurisdiction_ordinance_using_search_engine(
             self.tech_specs.questions,
             self.jurisdiction,
@@ -1050,7 +1057,7 @@ async def _find_documents_using_search_engine(self):
         return docs
 
     async def _find_documents_from_website(self):
-        """Search the website for ordinance documents"""
+        """Search the jurisdiction website for ordinance documents"""
         if self.jurisdiction_website and self.validate_user_website_input:
             await self._validate_jurisdiction_website()
 
@@ -1080,7 +1087,7 @@ async def _find_documents_from_website(self):
         return docs
 
     async def _validate_jurisdiction_website(self):
-        """Validate user input for jurisdiction website"""
+        """Validate a user-supplied jurisdiction website URL"""
         if self.jurisdiction_website is None:
             return
 
@@ -1111,7 +1118,7 @@ async def _validate_jurisdiction_website(self):
             self.jurisdiction_website = None
 
     async def _try_find_jurisdiction_website(self):
-        """Use web to try to find the main jurisdiction website"""
+        """Locate the primary jurisdiction website via search"""
         COMPASS_PB.update_jurisdiction_task(
             self.jurisdiction.full_name,
             description="Searching for jurisdiction website...",
@@ -1130,7 +1137,7 @@ async def _try_find_jurisdiction_website(self):
         )
 
     async def _try_elm_crawl(self):
-        """Try crawling website using ELM crawler"""
+        """Crawl the jurisdiction website using the ELM crawler"""
         self.jurisdiction_website = await get_redirected_url(
             self.jurisdiction_website, timeout=30
         )
@@ -1162,7 +1169,7 @@ async def _try_elm_crawl(self):
         return docs, scrape_results
 
     async def _try_compass_crawl(self, scrape_results):
-        """Try to crawl the website with compass-style crawling"""
+        """Crawl the jurisdiction website using the COMPASS crawler"""
         checked_urls = set()
         for scrape_result in scrape_results:
             checked_urls.update({sub_res.url for sub_res in scrape_result})
@@ -1194,7 +1201,7 @@ async def _try_compass_crawl(self, scrape_results):
         )
 
     async def _parse_docs_for_ordinances(self, docs):
-        """Parse docs (in order) for ordinances"""
+        """Parse candidate documents in order until ordinances found"""
         for possible_ord_doc in docs:
             doc = await self._try_extract_all_ordinances(possible_ord_doc)
             ord_count = num_ordinances_in_doc(
@@ -1211,7 +1218,7 @@ async def _parse_docs_for_ordinances(self, docs):
         return None
 
     async def _try_extract_all_ordinances(self, possible_ord_doc):
-        """Try to extract ordinance values and permitted districts"""
+        """Extract both ordinance values and permitted-use districts"""
         with self._tracked_progress():
             tasks = [
                 asyncio.create_task(
@@ -1227,7 +1234,7 @@ async def _try_extract_all_ordinances(self, possible_ord_doc):
 
     @property
     def _extraction_task_kwargs(self):
-        """Keyword-argument pairs to pass to _try_extract_ordinances"""
+        """list: Dictionaries describing extraction task config"""
         return [
             {
                 "extractor_class": self.tech_specs.ordinance_text_extractor,
@@ -1276,7 +1283,7 @@ async def _try_extract_ordinances(
         text_model,
         value_model,
     ):
-        """Try applying a single extractor to the relevant legal text"""
+        """Apply a single extractor and parser to legal text"""
         logger.debug(
             "Checking for ordinances in doc from %s",
             possible_ord_doc.attrs.get("source", "unknown source"),
@@ -1304,7 +1311,7 @@ async def _try_extract_ordinances(
         return out
 
     async def _record_usage(self):
-        """Dump usage to file if tracker given"""
+        """Persist usage tracking data when a tracker is available"""
         if self.usage_tracker is None:
             return
 
diff --git a/compass/services/base.py b/compass/services/base.py
index b6c9e4cb7..5dda897a6 100644
--- a/compass/services/base.py
+++ b/compass/services/base.py
@@ -45,7 +45,7 @@ class Service(ABC):
 
     @classmethod
     def _queue(cls):
-        """Get queue for class."""
+        """Return the service queue for the class"""
         service_name = cls.__name__
         queue = get_service_queue(service_name)
         if queue is None:
@@ -81,6 +81,8 @@ def name(self):
     async def process_using_futures(self, fut, *args, **kwargs):
         """Process a call to the service
 
+        The result is communicated by updating ``fut``.
+
         Parameters
         ----------
         fut : asyncio.Future
@@ -109,12 +111,7 @@ def release_resources(self):  # noqa: B027
     @property
     @abstractmethod
     def can_process(self):
-        """Check if process function can be called.
-
-        This should be a fast-running method that returns a boolean
-        indicating whether or not the service can accept more
-        processing calls.
-        """
+        """bool: Flag indicating whether the service can accept work"""
 
     @abstractmethod
     async def process(self, *args, **kwargs):
@@ -177,7 +174,7 @@ def name(self):
         return f"{self.__class__.__name__}-{self.model_name}{self.service_tag}"
 
     def _queue(self):
-        """Get queue for class"""
+        """Return the service queue for this instance"""
         queue = get_service_queue(self.name)
         if queue is None:
             msg = MISSING_SERVICE_MESSAGE.format(service_name=self.name)
diff --git a/compass/services/cpu.py b/compass/services/cpu.py
index 480be34b0..e23110cd3 100644
--- a/compass/services/cpu.py
+++ b/compass/services/cpu.py
@@ -47,26 +47,22 @@ def can_process(self):
         return True
 
     async def process(self, fn, pdf_bytes, **kwargs):
-        """Write URL doc to file asynchronously
+        """Execute a PDF parsing function in the process pool
 
         Parameters
         ----------
-        doc : elm.web.document.BaseDocument
-            Document containing meta information about the file. Must
-            have a "source" key in the ``attrs`` dict containing the
-            URL, which will be converted to a file name using
-            :func:`elm.web.utilities.compute_fn_from_url`.
-        file_content : str or bytes
-            File content, typically string text for HTML files and bytes
-            for PDF file.
-        make_name_unique : bool, optional
-            Option to make file name unique by adding a UUID at the end
-            of the file name. By default, ``False``.
+        fn : callable
+            Callable executed inside the process pool. Receives
+            ``pdf_bytes`` as the first argument.
+        pdf_bytes : bytes
+            Raw PDF payload forwarded to ``fn``.
+        **kwargs
+            Additional keyword arguments passed to ``fn``.
 
         Returns
         -------
-        Path
-            Path to output file.
+        Any
+            Result returned by ``fn`` after execution.
         """
         loop = asyncio.get_running_loop()
         return await loop.run_in_executor(
diff --git a/compass/services/threaded.py b/compass/services/threaded.py
index c91951444..f80d8ff23 100644
--- a/compass/services/threaded.py
+++ b/compass/services/threaded.py
@@ -386,6 +386,11 @@ async def process(self, tracker):
         tracker : UsageTracker
             A usage tracker instance that contains usage info to be
             added to output file.
+
+        Returns
+        -------
+        dict
+            Updated usage dictionary persisted to ``usage_fp``.
         """
         self._is_processing = True
         try:
@@ -425,11 +430,9 @@ def can_process(self):
     async def process(
         self, jurisdiction, doc, seconds_elapsed, usage_tracker=None
     ):
-        """Add usage from tracker to file
+        """Record jurisdiction metadata in the tracking file
 
-        Any existing usage info in the file will remain unchanged
-        EXCEPT for anything under the label of the input `tracker`,
-        all of which will be replaced with info from the tracker itself.
+        The file on disk is updated in-place.
 
         Parameters
         ----------
@@ -482,6 +485,13 @@ async def process(self, html_fp, **kwargs):
         **kwargs
             Additional keyword-value argument pairs to pass to
             :class:`elm.web.document.HTMLDocument`.
+
+        Returns
+        -------
+        tuple
+            Two-item tuple of the loaded
+            :class:`~elm.web.document.HTMLDocument`
+            and the raw HTML string content.
         """
         loop = asyncio.get_running_loop()
         return await loop.run_in_executor(
@@ -612,7 +622,9 @@ async def read_html_file(html_fp, **kwargs):
 
     Returns
     -------
-    elm.web.document.HTMLDocument
-        HTMLDocument instance with text loaded into page.
+    tuple
+        Two-item tuple of the loaded
+        :class:`~elm.web.document.HTMLDocument`
+        and the raw HTML string content.
     """
     return await HTMLFileLoader.call(html_fp, **kwargs)
diff --git a/compass/services/usage.py b/compass/services/usage.py
index 0df00c332..3613fabb5 100644
--- a/compass/services/usage.py
+++ b/compass/services/usage.py
@@ -135,14 +135,7 @@ def add_to(self, other):
 
     @property
     def totals(self):
-        """Compute total usage across all sub-labels
-
-        Returns
-        -------
-        dict
-            Dictionary containing usage information totaled across all
-            sub-labels.
-        """
+        """dict: Aggregated usage totals across all sub-labels"""
         totals = {}
         for model, model_usage in self.items():
             total_model_usage = totals[model] = {}
diff --git a/compass/utilities/base.py b/compass/utilities/base.py
index 949c18a4f..4580b062b 100644
--- a/compass/utilities/base.py
+++ b/compass/utilities/base.py
@@ -8,24 +8,41 @@
 
 
 def title_preserving_caps(string):
-    """Convert string to title case, preserving existing capitalization
+    """Convert text to title case while keeping intentional capitals
 
     Parameters
     ----------
     string : str
-        Input string potentially containing capitalized words.
+        Input text that may already contain capitalized acronyms or
+        proper nouns.
 
     Returns
     -------
     str
-        String converted to title case, preserving existing
-        capitalization.
+        Title-cased string in which words containing existing uppercase
+        characters retain their capitalization.
+
+    Examples
+    --------
+    >>> title_preserving_caps("NREL solar ordinance")
+    'NREL Solar Ordinance'
     """
     return " ".join(map(_cap, string.split(" ")))
 
 
 class WebSearchParams:
-    """Helper class to store web search params"""
+    """Capture configuration for jurisdiction web searches
+
+    The class normalizes and stores search-related settings that are
+    reused across multiple search operations, including browser
+    concurrency, engine preferences, and filtering rules.
+
+    Notes
+    -----
+    Instances lazily translate the provided search engine definitions
+    into ELM-compatible keyword arguments via :attr:`se_kwargs`,
+    enabling straightforward reuse when issuing queries.
+    """
 
     def __init__(
         self,
@@ -124,7 +141,19 @@ def se_kwargs(self):
 
 
 class Directories:
-    """Helper class to store directories used in COMPASS run"""
+    """Encapsulate filesystem locations used by a COMPASS run
+
+    The helper centralizes directory computations so downstream code
+    can rely on fully resolved :class:`pathlib.Path` instances for
+    logging, cleaned text, downloaded ordinances, and intermediate
+    databases.
+
+    Notes
+    -----
+    All provided paths are expanded to absolute form when the class is
+    instantiated, guaranteeing consistent behavior across relative and
+    user-expanded paths.
+    """
 
     def __init__(
         self,
@@ -175,7 +204,6 @@ def __init__(
         )
 
     def __iter__(self):
-        """Iterate over all directories"""
         yield self.out
         yield self.logs
         yield self.clean_files
@@ -183,16 +211,16 @@ def __iter__(self):
         yield self.jurisdiction_dbs
 
     def make_dirs(self):
-        """Create all directories if they do not exist"""
+        """Create the managed directories if they do not exist"""
         for folder in self:
             folder.mkdir(exist_ok=True, parents=True)
 
 
 def _cap(word):
-    """Capitalize first letter of the word"""
+    """Capitalize the first character of ``word``; preserve the rest"""
     return "".join([word[0].upper(), word[1:]])
 
 
 def _full_path(in_path):
-    """Expand and resolve input path"""
+    """Resolve an input path to an absolute :class:`pathlib.Path`"""
     return Path(in_path).expanduser().resolve()
diff --git a/compass/utilities/enums.py b/compass/utilities/enums.py
index cf0864089..fa2e092b4 100644
--- a/compass/utilities/enums.py
+++ b/compass/utilities/enums.py
@@ -4,7 +4,22 @@
 
 
 class LLMUsageCategory(StrEnum):
-    """COMPASS LLM usage categories"""
+    """Enumerate semantic buckets for tracking LLM usage
+
+    The values in this enumeration provide consistent labels when
+    recording usage metrics, billing data, and telemetry associated
+    with LLM calls originating from COMPASS pipelines. Each category
+    maps to a specific functional concern (e.g., ordinance value
+    extraction, jurisdiction validation) allowing downstream analytics
+    to aggregate usage meaningfully.
+
+    Notes
+    -----
+    Values intentionally mirror the task names used when instantiating
+    :class:`~compass.llm.calling.BaseLLMCaller` implementations so that
+    the enumerations can be converted to strings without additional
+    mapping logic.
+    """
 
     CHAT = auto()
     """Usage related to general LLM chat calls"""
@@ -35,7 +50,20 @@ class LLMUsageCategory(StrEnum):
 
 
 class LLMTasks(StrEnum):
-    """LLM-based COMPASS tasks"""
+    """Human-friendly task identifiers for LLM workflows
+
+    This enumeration exposes the set of user-facing task names that map
+    onto :class:`LLMUsageCategory` entries. Pipeline components use
+    these values for configuration (e.g., selecting prompt templates)
+    while the paired usage categories ensure consistent metrics
+    tracking.
+
+    Notes
+    -----
+    When a task is defined as a direct alias of an
+    :class:`LLMUsageCategory`, it inherits the corresponding usage label
+    so downstream monitoring does not require additional translation.
+    """
 
     DATE_EXTRACTION = LLMUsageCategory.DATE_EXTRACTION
     """Date extraction task"""
diff --git a/compass/utilities/finalize.py b/compass/utilities/finalize.py
index 58f6eaac3..be1a457b8 100644
--- a/compass/utilities/finalize.py
+++ b/compass/utilities/finalize.py
@@ -52,32 +52,43 @@ def save_run_meta(
     total_cost,
     models,
 ):
-    """Write out meta information about ordinance collection run
+    """Persist metadata describing an ordinance collection run
 
     Parameters
     ----------
-    dirs : :class:`~compass.utilities.base.Directories`
-        Directories instance containing information about the output
-        directories used for the run.
+    dirs : compass.utilities.base.Directories
+        Directory container describing where outputs, logs, and working
+        files should be written during the run.
     tech : {"wind", "solar", "small wind"}
-        Technology that was the target of the run.
-    start_date, end_date : datetime.datetime
-        Instances representing the start and end dates, respectively.
-    num_jurisdictions_searched, num_jurisdictions_found : int
-        Total number of jurisdictions that were searched and actually
-        found, respectively.
+        Technology targeted by the collection run. The value is stored
+        verbatim in the metadata file for downstream reporting.
+    start_date : datetime.datetime
+        Timestamp marking when the run began.
+    end_date : datetime.datetime
+        Timestamp marking when the run finished.
+    num_jurisdictions_searched : int
+        Number of jurisdictions evaluated during the run.
+    num_jurisdictions_found : int
+        Number of jurisdictions that produced at least one ordinance.
     total_cost : float
-        Total cost of the processing, in $.
+        Aggregate cost incurred by LLM usage for the run. ``None`` or
+        zero values are recorded as ``null`` in the metadata.
     models : dict
-        Dictionary mapping task names (from
-        :class:`~compass.utilities.enums.LLMTasks`) to
-        :class:`~compass.llm.config.OpenAIConfig` instances used for the
-        run.
+        Mapping from LLM task identifiers (as str) to configuration
+        objects (:class:`~compass.llm.config.OpenAIConfig`) used
+        throughout the run. The function records a condensed summary of
+        each configuration.
 
     Returns
     -------
-    run_time : float
-        Total processing run-time, in seconds.
+    float
+        Total runtime of the collection, expressed in seconds.
+
+    Notes
+    -----
+    The function writes ``meta.json`` into ``dirs.out`` alongside
+    references to other artifacts generated during the run. The return
+    value mirrors the ``total_time`` entry stored in the metadata.
     """
 
     try:
@@ -124,32 +135,32 @@ def save_run_meta(
 
 
 def doc_infos_to_db(doc_infos):
-    """Convert list of docs to output database
+    """Aggregate parsed ordinance CSV files into a normalized database
 
     Parameters
     ----------
-    doc_infos : iterable of dict
-        Iterable of dictionaries, where each dictionary has at least the
-        following keys:
-
-            - "ord_db_fp": Path to parsed ordinance CSV file
-            - "source": URL of the file from which ordinances were
-              extracted
-            - "date": Tuple of (year, month, day). Any of the values can
-              be ``None``.
-            - "jurisdiction": Instance of Jurisdiction representing the
-              jurisdiction associated with these ordinance values.
-
-        If this iterable is empty, and empty DataFrame (with the correct
-        columns) is returned.
+    doc_infos : Iterable
+        Iterable of dictionaries describing ordinance extraction
+        results. Each dictionary must contain ``"ord_db_fp"`` (path to a
+        parsed CSV), ``"source"`` (document URL), ``"date"`` (tuple of
+        year, month, day, with ``None`` allowed), and ``"jurisdiction"``
+        (a :class:`~compass.utilities.location.Jurisdiction` instance).
 
     Returns
     -------
-    ordinances : pandas.DataFrame
-        DataFrame containing ordinances collected from all individual
-        CSV's.
-    count : int
-        Total number jurisdictions for which ordinances were found.
+    pandas.DataFrame
+        Consolidated ordinance dataset containing the columns enumerated
+        in :data:`_PARSED_COLS`.
+    int
+        Number of jurisdictions contributing at least one ordinance to
+        the consolidated dataset.
+
+    Notes
+    -----
+    Empty or ``None`` entries in ``doc_infos`` are skipped. Ordinance
+    CSVs that lack parsed values (``num_ordinances_dataframe`` equals
+    zero) are ignored. The returned DataFrame enforces an ordered column
+    layout and casts the ``quantitative`` flag to nullable boolean.
     """
     db = []
     for doc_info in doc_infos:
@@ -180,19 +191,24 @@ def doc_infos_to_db(doc_infos):
 
 
 def save_db(db, out_dir):
-    """Split DB into qualitative vs quantitative and save to disk
+    """Write qualitative and quantitative ordinance outputs to disk
 
     Parameters
     ----------
     db : pandas.DataFrame
-        Pandas DataFrame containing ordinance data to save. Must have
-        all columns in :obj:`QUANT_OUT_COLS` and :obj:`QUAL_OUT_COLS`
-        as well as a ``"quantitative"`` column that contains a boolean
-        determining whether the rwo belongs in the quantitative output
-        file (``True``) or the qualitative output file (``False``).
+        Ordinance dataset containing the full set of columns listed in
+        :data:`QUANT_OUT_COLS` and :data:`QUAL_OUT_COLS`, plus the
+        ``quantitative`` boolean flag that dictates output routing.
     out_dir : path-like
-        Path to output directory where ordinance database csv files
-        should be written.
+        Directory where ``qualitative_ordinances.csv`` and
+        ``quantitative_ordinances.csv`` should be written. The directory
+        is created by :class:`pathlib.Path` if necessary.
+
+    Notes
+    -----
+    Empty DataFrames short-circuit without creating output files. The
+    function respects the boolean ``quantitative`` column and assumes it
+    has already been sanitized by :func:`doc_infos_to_db`.
     """
     if db.empty:
         return
@@ -272,23 +288,31 @@ def _extract_model_info_from_all_models(models):
 def compile_run_summary_message(
     total_seconds, total_cost, out_dir, document_count
 ):
-    """Summarize the run results into a formatted string
+    """Create a human-readable summary of a completed run
 
     Parameters
     ----------
-    total_seconds : int or float
-        Total number of seconds the run took to complete.
-    total_cost : int or float
-        Total cost of the run, in $.
+    total_seconds : float or int
+        Duration of the run in seconds.
+    total_cost : float or int or None
+        Monetary cost incurred by the run. ``None`` or zero suppresses
+        the cost line in the summary.
     out_dir : path-like
-        Path to output directory where the run results are saved.
+        Location of the run output directory. The value is embedded in
+        the summary text.
     document_count : int
-        Number of documents found during the run.
+        Number of documents discovered across all jurisdictions.
 
     Returns
     -------
     str
-        Formatted string summarizing the run results.
+        Summary string formatted for CLI presentation with ``rich``
+        markup.
+
+    Notes
+    -----
+    The function does not perform I/O; callers may log or display the
+    returned string as needed.
     """
     runtime = _elapsed_time_as_str(total_seconds)
     total_cost = (
diff --git a/compass/utilities/io.py b/compass/utilities/io.py
index c099e7956..02f3e9c50 100644
--- a/compass/utilities/io.py
+++ b/compass/utilities/io.py
@@ -10,23 +10,35 @@
 
 
 async def load_local_docs(fps, **kwargs):
-    """Load a document for each input filepath
+    """Load local documents into `elm` document instances
 
     Parameters
     ----------
-    fps : iterable of path-like
-        Iterable of paths representing documents to load.
-    kwargs
-        Keyword-argument pairs to initialize
-        :class:`elm.web.file_loader.AsyncLocalFileLoader`.
+    fps : Iterable
+        Iterable of paths referencing local files to load.
+    **kwargs
+        Additional keyword arguments forwarded to
+        :class:`elm.web.file_loader.AsyncLocalFileLoader` for
+        configuration such as ``loader``, caching, or parsing options.
 
     Returns
     -------
-    list
-        List of non-empty document instances containing information from
-        the local documents. If a file could not be loaded (i.e.
-        document instance is empty), it will not be included in the
-        output list.
+    list of elm.web.document.BaseDocument
+        Non-empty loaded documents corresponding to the supplied
+        filepaths. Empty results (e.g., unreadable files) are filtered
+        out of the returned list.
+
+    Raises
+    ------
+    elm.exceptions.ELMError
+        Propagated when the underlying loader fails to read one of the
+        provided files and is configured to raise on errors.
+
+    Notes
+    -----
+    Detailed debug information about loaded page counts is emitted via
+    the ``compass.utilities.io`` logger at ``TRACE`` level to assist
+    with troubleshooting ingestion runs.
     """
     logger.trace("Loading docs for the following paths:\n%r", fps)
     logger.trace(
diff --git a/compass/utilities/jurisdictions.py b/compass/utilities/jurisdictions.py
index 0dfbe0682..774edcd9d 100644
--- a/compass/utilities/jurisdictions.py
+++ b/compass/utilities/jurisdictions.py
@@ -18,19 +18,24 @@
 
 
 def load_all_jurisdiction_info():
-    """Load DataFrame containing info for all jurisdictions
+    """Load canonical jurisdiction metadata for the continental US
 
     Returns
     -------
     pandas.DataFrame
-        DataFrame containing info like names, FIPS, websites, etc. for
-        all jurisdictions.
+        Table containing jurisdiction names, FIPS codes, official
+        websites, and related attributes.
+
+    Notes
+    -----
+    Missing values are normalized to ``None`` to simplify downstream
+    serialization.
     """
     return pd.read_csv(_COUNTY_DATA_FP).replace({np.nan: None})
 
 
 def jurisdiction_websites(jurisdiction_info=None):
-    """Load mapping of jurisdiction name and state to website
+    """Build a mapping of jurisdiction identifiers to website URLs
 
     Parameters
     ----------
@@ -43,8 +48,13 @@ def jurisdiction_websites(jurisdiction_info=None):
     Returns
     -------
     dict
-        Dictionary where keys are FIPS codes and values are the relevant
-        website URL.
+        Mapping from jurisdiction FIPS codes to their primary website
+        URLs.
+
+    Notes
+    -----
+    The helper uses FIPS codes rather than string names to avoid
+    collisions between same-named jurisdictions in different states.
     """
     if jurisdiction_info is None:
         jurisdiction_info = load_all_jurisdiction_info()
@@ -55,7 +65,10 @@ def jurisdiction_websites(jurisdiction_info=None):
 
 
 def load_jurisdictions_from_fp(jurisdiction_fp):
-    """Load jurisdiction info based on jurisdictions in the input fp
+    """Load jurisdiction metadata for entries listed in a CSV file
+
+    This loader trims whitespace, deduplicates request rows, and filters
+    out jurisdictions not present in the canonical data set.
 
     Parameters
     ----------
@@ -66,9 +79,18 @@ def load_jurisdictions_from_fp(jurisdiction_fp):
     Returns
     -------
     pandas.DataFrame
-        DataFrame containing jurisdiction info like names, FIPS,
-        websites, etc. for all requested jurisdictions (that were
-        found).
+        Jurisdiction information, including FIPS codes and websites,
+        for every matching entry in the lookup table.
+
+    Raises
+    ------
+    COMPASSValueError
+        If the input file is missing required columns (``State`` or
+        ``Jurisdiction Type`` when subdivisions are provided).
+
+    Notes
+    -----
+    Missing jurisdictions trigger warnings with a tabular summary.
     """
     jurisdictions = pd.read_csv(jurisdiction_fp).replace({np.nan: None})
     jurisdictions = _validate_jurisdiction_input(jurisdictions)
diff --git a/compass/utilities/location.py b/compass/utilities/location.py
index b6bc3da94..6ef8ece35 100644
--- a/compass/utilities/location.py
+++ b/compass/utilities/location.py
@@ -14,7 +14,21 @@
 
 
 class Jurisdiction:
-    """Class representing a jurisdiction"""
+    """Model a geographic jurisdiction used throughout COMPASS
+
+    The class normalizes casing for location components and provides
+    convenience properties for rendering jurisdiction names with
+    correct prefixes. It is designed to align with ordinance validation
+    logic that expects consistent casing and phrasing across states,
+    counties, and municipal subdivisions.
+
+    Notes
+    -----
+    Instances compare case-insensitively for type and state, while the
+    county and subdivision name comparisons preserve their stored
+    casing. Hashing and ``str`` conversions defer to the full display
+    name generated by :attr:`full_name`.
+    """
 
     def __init__(
         self,
@@ -65,7 +79,7 @@ def __init__(
 
     @cached_property
     def full_name(self):
-        """str: Full jurisdiction name"""
+        """str: Comma-separated jurisdiction display name"""
         name_parts = [
             self.full_subdivision_phrase,
             self.full_county_phrase,
@@ -76,7 +90,7 @@ def full_name(self):
 
     @cached_property
     def full_name_the_prefixed(self):
-        """str: Full jurisdiction name with `the` prefix if needed"""
+        """str: Full location name prefixed with ``the`` as needed"""
         if self.type.casefold() == "state":
             return f"the state of {self.state}"
 
@@ -87,7 +101,7 @@ def full_name_the_prefixed(self):
 
     @cached_property
     def full_subdivision_phrase(self):
-        """str: Full jurisdiction subdivision phrase, or empty str"""
+        """str: Subdivision phrase for the jurisdiction or empty str"""
         if not self.subdivision_name:
             return ""
 
@@ -98,7 +112,7 @@ def full_subdivision_phrase(self):
 
     @cached_property
     def full_subdivision_phrase_the_prefixed(self):
-        """str: Full jurisdiction subdivision phrase, or empty str"""
+        """str: Subdivision phrase prefixed with ``the`` as needed"""
         if self.type.casefold() in JURISDICTION_TYPES_AS_PREFIXES:
             return f"the {self.full_subdivision_phrase}"
 
@@ -106,7 +120,7 @@ def full_subdivision_phrase_the_prefixed(self):
 
     @cached_property
     def full_county_phrase(self):
-        """str: Full jurisdiction county phrase, or empty str"""
+        """str: County phrase for the jurisdiction or empty str"""
         if not self.county:
             return ""
 
diff --git a/compass/utilities/nt.py b/compass/utilities/nt.py
index d937aad32..d27e8f7c0 100644
--- a/compass/utilities/nt.py
+++ b/compass/utilities/nt.py
@@ -15,6 +15,32 @@
     ],
     defaults=[None, None, None, None, 25],
 )
+ProcessKwargs.__doc__ = """Execution options passed to `compass process`
+
+Parameters
+----------
+known_local_docs : list of path-like, optional
+    Local ordinance files to seed the run. ``None`` disables the seed.
+    By default, ``None``.
+known_doc_urls : list of str, optional
+    Known ordinance URLs to prioritize during retrieval.
+    By default, ``None``.
+file_loader_kwargs : dict, optional
+    Keyword arguments forwarded to the document loader implementation.
+    By default, ``None``.
+td_kwargs : dict, optional
+    Additional configuration for top-level document discovery logic.
+    By default, ``None``.
+tpe_kwargs : dict, optional
+    Parameters controlling text parsing and extraction.
+    By default, ``None``.
+ppe_kwargs : dict, optional
+    Parameters controlling permitted-use parsing and extraction.
+    By default, ``None``.
+max_num_concurrent_jurisdictions : int, default=25
+    Maximum number of jurisdictions processed simultaneously.
+    By default, ``25``.
+"""
 
 TechSpec = namedtuple(
     "TechSpec",
@@ -31,3 +57,28 @@
         "website_url_keyword_points",
     ],
 )
+TechSpec.__doc__ = """Bundle extraction configuration for a technology
+
+Parameters
+----------
+name : str
+    Display name for the technology (e.g., ``"solar"``).
+questions : dict
+    Prompt templates or question sets used during extraction.
+heuristic : callable
+    Function implementing heuristic filters prior to LLM invocation.
+ordinance_text_collector : callable
+    Callable that gathers candidate ordinance text spans.
+ordinance_text_extractor : callable
+    Callable that extracts relevant ordinance snippets.
+permitted_use_text_collector : callable
+    Callable that gathers candidate permitted-use text spans.
+permitted_use_text_extractor : callable
+    Callable that extracts permitted-use content.
+structured_ordinance_parser : callable
+    Callable that transforms ordinance text into structured values.
+structured_permitted_use_parser : callable
+    Callable that transforms permitted-use text into structured values.
+website_url_keyword_points : dict or None
+    Weightings for scoring website URLs during search.
+"""
diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py
index 1bb4284a0..aac176754 100644
--- a/compass/utilities/parsing.py
+++ b/compass/utilities/parsing.py
@@ -1,4 +1,4 @@
-"""COMPASS Ordinances parsing utilities."""
+"""COMPASS ordinance parsing utilities"""
 
 import json
 import logging
@@ -19,32 +19,38 @@ def clean_backticks_from_llm_response(content):
     Parameters
     ----------
     content : str
-        LLM response that may or may not contain markdown-style triple
-        backticks.
+        LLM response that may contain markdown triple backticks.
 
     Returns
     -------
     str
-        LLM response stripped of the markdown-style backticks
+        Response stripped of all leading and trailing backtick markers.
     """
     content = content.lstrip().rstrip()
     return content.removeprefix("```").lstrip("\n").removesuffix("```")
 
 
 def llm_response_as_json(content):
-    """LLM response to JSON
+    """Parse a raw LLM response into JSON-compatible data
 
     Parameters
     ----------
     content : str
-        LLM response that contains a string representation of
-        a JSON file.
+        Response text expected to contain a JSON object, possibly with
+        Markdown fences or Python boolean literals.
 
     Returns
     -------
     dict
-        Response parsed into dictionary. This dictionary will be empty
-        if the response cannot be parsed by JSON.
+        Parsed JSON structure. When parsing fails, the function returns
+        an empty dictionary.
+
+    Notes
+    -----
+    The parser strips Markdown code fences, coerces Python-style
+    booleans to lowercase JSON literals, and logs the raw response on
+    decode failure. The logging includes guidance for increasing token
+    limits or updating prompts.
     """
     content = clean_backticks_from_llm_response(content)
     content = content.removeprefix("json").lstrip("\n")
@@ -65,7 +71,12 @@ def llm_response_as_json(content):
 
 
 def merge_overlapping_texts(text_chunks, n=300):
-    """Merge chunks of text by removing any overlap.
+    """Merge text chunks while trimming overlapping boundaries
+
+    Overlap detection compares at most ``n`` characters at each
+    boundary but never more than half the length of the accumulated
+    output. Chunks that do not overlap are concatenated with a newline
+    separator.
 
     Parameters
     ----------
@@ -81,7 +92,7 @@ def merge_overlapping_texts(text_chunks, n=300):
     Returns
     -------
     str
-        Merged text.
+        Merged text assembled from the non-overlapping portions.
     """
     text_chunks = list(filter(None, text_chunks))
     if not text_chunks:
@@ -103,7 +114,7 @@ def merge_overlapping_texts(text_chunks, n=300):
 
 
 def extract_ord_year_from_doc_attrs(doc_attrs):
-    """Extract year corresponding to the ordinance from doc instance
+    """Extract the ordinance year stored in document attributes
 
     Parameters
     ----------
@@ -117,15 +128,21 @@ def extract_ord_year_from_doc_attrs(doc_attrs):
     Returns
     -------
     int or None
-        Parsed year for ordinance (int) or ``None`` if it wasn't found
-        in the document's attrs.
+        Parsed ordinance year or ``None`` when unavailable or invalid.
+
+    Examples
+    --------
+    >>> extract_ord_year_from_doc_attrs({"date": (2024, 5, 17)})
+    2024
+    >>> extract_ord_year_from_doc_attrs({"date": (None, None, None)})
+    None
     """
     year = doc_attrs.get("date", (None, None, None))[0]
     return year if year is not None and year > 0 else None
 
 
 def num_ordinances_in_doc(doc, exclude_features=None):
-    """Count number of ordinances found in document
+    """Count the number of ordinance entries on a document
 
     Parameters
     ----------
@@ -139,7 +156,7 @@ def num_ordinances_in_doc(doc, exclude_features=None):
     Returns
     -------
     int
-        Number of unique ordinance values extracted from this document.
+        Number of ordinance rows represented in ``doc``.
     """
     if doc is None or doc.attrs.get("ordinance_values") is None:
         return 0
@@ -150,7 +167,7 @@ def num_ordinances_in_doc(doc, exclude_features=None):
 
 
 def num_ordinances_dataframe(data, exclude_features=None):
-    """Count number of ordinances found in DataFrame
+    """Count ordinance rows contained in a DataFrame
 
     Parameters
     ----------
@@ -164,7 +181,13 @@ def num_ordinances_dataframe(data, exclude_features=None):
     Returns
     -------
     int
-        Number of unique ordinance values extracted from this DataFrame.
+        Count of rows meeting the ordinance criteria.
+
+    Raises
+    ------
+    KeyError
+        If the input DataFrame lacks the ``feature`` column when
+        ``exclude_features`` is provided.
     """
     if exclude_features:
         mask = ~data["feature"].str.casefold().isin(exclude_features)
@@ -174,7 +197,7 @@ def num_ordinances_dataframe(data, exclude_features=None):
 
 
 def ordinances_bool_index(data):
-    """Array of bools indicating rows containing ordinances in DataFrame
+    """Compute a boolean mask indicating ordinance rows
 
     Parameters
     ----------
@@ -184,9 +207,8 @@ def ordinances_bool_index(data):
 
     Returns
     -------
-    array-like
-        Array of bools indicating rows containing ordinances in
-        DataFrame.
+    numpy.ndarray
+        Boolean mask identifying rows that contain ordinance values.
     """
     if data is None or data.empty:
         return np.array([], dtype=bool)
@@ -200,7 +222,7 @@ def ordinances_bool_index(data):
 
 
 def load_config(config_fp):
-    """Load a JSON or JSON5 config file
+    """Load configuration data from JSON or JSON5 sources
 
     Parameters
     ----------
@@ -210,12 +232,19 @@ def load_config(config_fp):
     Returns
     -------
     dict
-        Dictionary containing the config file contents.
+        Parsed configuration object.
 
     Raises
     ------
     COMPASSValueError
-        If the config file does not end with `.json` or `.json5`.
+        If the file path does not exist or the extension is not
+        ``.json`` or ``.json5``.
+
+    Notes
+    -----
+    JSON5 loading is handled via :mod:`pyjson5`, enabling comments and
+    trailing commas, among other quality-of-life improvements over
+    standard JSON, which uses the built-in :func:`json.load`.
     """
     config_fp = Path(config_fp)
 
diff --git a/compass/validation/content.py b/compass/validation/content.py
index 8e41cbe33..1b084cd9d 100644
--- a/compass/validation/content.py
+++ b/compass/validation/content.py
@@ -18,13 +18,12 @@
 
 
 class ParseChunksWithMemory:
-    """Check text chunks by sometimes looking at previous chunks
+    """Iterate through text chunks while caching prior LLM decisions
 
-    The idea behind this approach is that sometimes the context for a
-    setback or other ordinances is found in a previous chunk, so it may
-    be worthwhile (especially for validation purposes) to check a few
-    text chunks back for some validation pieces. In order to do this
-    semi-efficiently, we make use of a cache that's labeled "memory".
+    This helper stores an in-memory cache of prior validation results so
+    each chunk can optionally reuse outcomes from earlier LLM calls. The
+    design supports revisiting a configurable number of preceding text
+    chunks when newer chunks lack sufficient context.
     """
 
     def __init__(self, text_chunks, num_to_recall=2):
@@ -61,38 +60,30 @@ def _inverted_text(self, starting_ind):
         yield from inverted_text[:self.num_to_recall]
 
     async def parse_from_ind(self, ind, key, llm_call_callback):
-        """Validate a chunk of text
+        """Validate a chunk by consulting current and prior context
 
-        Validation occurs by querying the LLM using the input prompt and
-        parsing the `key` from the response JSON. The prompt should
-        request that the key be a boolean output. If the key retrieved
-        from the LLM response is False, a number of previous text chunks
-        are checked as well, using the same prompt. This can be helpful
-        in cases where the answer to the validation prompt (e.g. does
-        this text pertain to a large WECS?) is only found in a previous
-        text chunk.
+        Cached verdicts are reused to avoid redundant LLM calls when
+        neighboring chunks have already been assessed. If the cache
+        lacks a verdict, the callback is executed and the result stored.
 
         Parameters
         ----------
         ind : int
-            Positive integer corresponding to the chunk index.
-            Must be less than `len(text_chunks)`.
+            Index of the chunk to inspect. Must be less than the number
+            of available chunks.
         key : str
-            A key expected in the JSON output of the LLM containing the
-            response for the validation question. This string will also
-            be used to format the system prompt before it is passed to
-            the LLM.
+            JSON key expected in the LLM response. The same key is used
+            to populate the decision cache.
         llm_call_callback : callable
-            Callable that takes a `key` and `text_chunk` as inputs and
-            returns a boolean indicating whether or not the text chunk
-            passes the validation check.
+            Awaitable invoked with ``(key, text_chunk)`` that returns a
+            boolean indicating whether the chunk satisfies the LLM
+            validation check.
 
         Returns
         -------
         bool
-            ``True`` if the LLM returned ``True`` for this text chunk or
-            `num_to_recall-1` text chunks before it.
-            ``False`` otherwise.
+            ``True`` if the selected or recalled chunk satisfies the
+            check, ``False`` otherwise.
         """
         logger.debug("Checking %r for ind %d", key, ind)
         mem_text = zip(
@@ -316,7 +307,7 @@ async def parse_by_chunks(
     callbacks=None,
     min_chunks_to_process=3,
 ):
-    """Parse text by chunks, passing to callbacks if it's legal text
+    """Stream text chunks through heuristic and legal validators
 
     This method goes through the chunks one by one, and passes them to
     the callback parsers if the `legal_text_validator` check passes. If
@@ -346,6 +337,13 @@ async def parse_by_chunks(
     min_chunks_to_process : int, optional
         Minimum number of chunks to process before aborting due to text
         not being legal. By default, ``3``.
+
+    Notes
+    -----
+    This coroutine only orchestrates validation. Callbacks are
+    responsible for persisting any extracted results. Callback futures
+    are awaited concurrently and share the same task name as the caller
+    to simplify tracing within structured logging.
     """
     passed_heuristic_mem = []
     callbacks = callbacks or []
diff --git a/compass/validation/graphs.py b/compass/validation/graphs.py
index 31c2287d9..1bb9a1984 100644
--- a/compass/validation/graphs.py
+++ b/compass/validation/graphs.py
@@ -8,18 +8,28 @@
 
 
 def setup_graph_correct_document_type(**kwargs):
-    """Setup graph to check for correct document type in legal text
+    """Build a decision tree for validating ordinance document types
 
     Parameters
     ----------
     **kwargs
-        Keyword-value pairs to add to graph.
+        Additional keyword arguments forwarded to
+        :func:`compass.common.setup_graph_no_nodes`. The helper consumes
+        ``doc_is_from_ocr`` (default ``False``) to alter draft-detection
+        prompts for scanned documents.
 
     Returns
     -------
     networkx.DiGraph
-        Graph instance that can be used to initialize an
-        `elm.tree.DecisionTree`.
+        Graph suitable for constructing an ``elm.tree.DecisionTree``
+        that distinguishes legally binding ordinances from draft,
+        planning, meeting, and similar documents.
+
+    Notes
+    -----
+    The resulting graph encodes a structured sequence of Yes/No prompts
+    that culminate in a JSON response containing summary metadata and a
+    legal-text boolean keyed by ``{key}``.
     """
     doc_is_from_ocr = kwargs.pop("doc_is_from_ocr", False)
 
@@ -277,20 +287,29 @@ def setup_graph_correct_document_type(**kwargs):
 
 
 def setup_graph_correct_jurisdiction_type(jurisdiction, **kwargs):
-    """Setup graph to check for correct jurisdiction type in legal text
+    """Build a decision tree for jurisdiction-type validation
 
     Parameters
     ----------
-    jurisdiction : Jurisdiction
-        Jurisdiction for which validation is being performed.
+    jurisdiction : compass.utilities.location.Jurisdiction
+        Target jurisdiction descriptor that guides prompt wording.
     **kwargs
-        Keyword-value pairs to add to graph.
+        Additional keyword arguments forwarded to
+        :func:`compass.common.setup_graph_no_nodes` (for example,
+        ``usage_tracker`` or ``llm_service`` identifiers).
 
     Returns
     -------
     networkx.DiGraph
-        Graph instance that can be used to initialize an
-        `elm.tree.DecisionTree`.
+        Graph capturing the sequence of questions needed to verify
+        whether ordinance text names the expected jurisdiction type and
+        geography.
+
+    Notes
+    -----
+    The prompts collected through this graph expect the LLM to return a
+    JSON payload keyed by ``correct_jurisdiction`` plus a human-readable
+    explanation summarizing the reasoning.
     """
     G = setup_graph_no_nodes(  # noqa: N806
         d_tree_name="Correct jurisdiction type", **kwargs
@@ -497,20 +516,29 @@ def setup_graph_correct_jurisdiction_type(jurisdiction, **kwargs):
 
 
 def setup_graph_correct_jurisdiction_from_url(jurisdiction, **kwargs):
-    """Setup graph to check for correct jurisdiction in URL
+    """Build a decision tree for validating jurisdictions from URLs
 
     Parameters
     ----------
-    jurisdiction : Jurisdiction
-        Jurisdiction for which validation is being performed.
+    jurisdiction : compass.utilities.location.Jurisdiction
+        Jurisdiction descriptor supplying state, county, and subdivision
+        phrases used in prompts.
     **kwargs
-        Keyword-value pairs to add to graph.
+        Additional keyword arguments forwarded to
+        :func:`compass.common.setup_graph_no_nodes`.
 
     Returns
     -------
     networkx.DiGraph
-        Graph instance that can be used to initialize an
-        `elm.tree.DecisionTree`.
+        Graph that queries whether a URL explicitly references the
+        jurisdiction's state, county, and subdivision names and returns
+        a JSON verdict.
+
+    Notes
+    -----
+    The graph aggregates boolean keys such as ``correct_state`` and
+    ``correct_county``. The final prompt instructs the LLM to emit a
+    JSON document describing each match plus an explanatory string.
     """
     G = setup_graph_no_nodes(  # noqa: N806
         d_tree_name="Correct jurisdiction type from URL", **kwargs
diff --git a/compass/validation/location.py b/compass/validation/location.py
index 0a54682dd..6d07d7a7a 100644
--- a/compass/validation/location.py
+++ b/compass/validation/location.py
@@ -22,7 +22,7 @@
 
 
 class DTreeURLJurisdictionValidator(BaseLLMCaller):
-    """Validator that checks whether a URL matches a jurisdiction"""
+    """Validate whether a URL appears to target a jurisdiction"""
 
     SYSTEM_MESSAGE = (
         "You are an expert data analyst that examines URLs to determine if "
@@ -36,28 +36,49 @@ def __init__(self, jurisdiction, **kwargs):
 
         Parameters
         ----------
-        structured_llm_caller : StructuredLLMCaller
-            Instance used for structured validation queries.
+        jurisdiction : compass.utilities.location.Jurisdiction
+            Jurisdiction descriptor with the target location attributes.
         **kwargs
-            Additional keyword arguments to pass to the
-            :class:`~compass.llm.calling.BaseLLMCaller` instance.
+            Additional keyword arguments forwarded to
+            :class:`~compass.llm.calling.BaseLLMCaller` for model
+            selection, temperature, or tracing control.
+
+        Notes
+        -----
+        The validator stores the input jurisdiction for subsequent URL
+        checks; it does not perform any validation work during
+        instantiation.
         """
         super().__init__(**kwargs)
         self.jurisdiction = jurisdiction
 
     async def check(self, url):
-        """Check if the content passes the validation
+        """Determine whether the supplied URL targets the jurisdiction
 
         Parameters
         ----------
-        content : str
-            Document content to validate.
+        url : str
+            URL string to evaluate. Empty values short-circuit to
+            ``False``.
 
         Returns
         -------
         bool
-            ``True`` if the content passes the validation check,
-            ``False`` otherwise.
+            ``True`` when the decision-tree evaluation finds all
+            jurisdiction criteria satisfied, ``False`` otherwise.
+
+        Raises
+        ------
+        compass.exceptions.COMPASSError
+            Propagated if underlying LLM interactions fail while the
+            caller has configured :class:`BaseLLMCaller` to raise.
+
+        Notes
+        -----
+        The method delegates to an internal asynchronous decision tree
+        backed by :class:`ChatLLMCaller`. The validator aggregates
+        structured responses and only approves when each required
+        attribute matches the target jurisdiction.
         """
         if not url:
             return False
@@ -87,7 +108,7 @@ def _parse_output(self, props):  # noqa: PLR6301
 
 
 class DTreeJurisdictionValidator(BaseLLMCaller):
-    """Jurisdiction Validation using a decision tree"""
+    """Validate ordinance text against a target jurisdiction"""
 
     META_SCORE_KEY = "Jurisdiction Validation Score"
     """Key in doc.attrs where score is stored"""
@@ -103,28 +124,42 @@ def __init__(self, jurisdiction, **kwargs):
 
         Parameters
         ----------
-        structured_llm_caller : StructuredLLMCaller
-            Instance used for structured validation queries.
+        jurisdiction : compass.utilities.location.Jurisdiction
+            Jurisdiction descriptor identifying expected applicability.
         **kwargs
-            Additional keyword arguments to pass to the
-            :class:`~compass.llm.calling.BaseLLMCaller` instance.
+            Additional keyword arguments forwarded to
+            :class:`~compass.llm.calling.BaseLLMCaller` for configuring
+            LLM temperature, timeout, or similar options.
         """
         super().__init__(**kwargs)
         self.jurisdiction = jurisdiction
 
     async def check(self, content):
-        """Check if the content passes the validation
+        """Determine whether ordinance text matches the jurisdiction
+
+        The decision tree checks jurisdiction type, state, and
+        subdivision alignment.
 
         Parameters
         ----------
         content : str
-            Document content to validate.
+            Plain-text ordinance content extracted from a document.
 
         Returns
         -------
         bool
-            ``True`` if the content passes the validation check,
-            ``False`` otherwise.
+            ``True`` when the decision tree concludes the ordinance is
+            scoped to the configured jurisdiction, ``False`` otherwise.
+
+        Raises
+        ------
+        compass.exceptions.COMPASSError
+            Raised if the underlying LLM caller propagates an execution
+            failure.
+
+        Notes
+        -----
+        Empty content returns ``False`` without invoking the LLM.
         """
         if not content:
             return False
@@ -154,20 +189,13 @@ def _parse_output(self, props):  # noqa: PLR6301
 
 
 class JurisdictionValidator:
-    """COMPASS Ordinance Jurisdiction validator
-
-    Combines the logic of several validators into a single class.
-
-    Purpose:
-        Determine whether a document pertains to a specific county.
-    Responsibilities:
-        1. Use a combination of heuristics and LLM queries to determine
-           whether or not a document pertains to a particular county.
-    Key Relationships:
-        Uses a StructuredLLMCaller for LLM queries and delegates
-        sub-validation to
-        :class:`DTreeJurisdictionValidator`,
-        and :class:`DTreeURLJurisdictionValidator`.
+    """Coordinate URL and text jurisdiction validation for documents
+
+    Notes
+    -----
+    The validator stores the score threshold, optional text splitter,
+    and keyword arguments so they can be reused across many documents
+    without reconfiguration.
     """
 
     def __init__(self, score_thresh=0.8, text_splitter=None, **kwargs):
@@ -176,36 +204,58 @@ def __init__(self, score_thresh=0.8, text_splitter=None, **kwargs):
         Parameters
         ----------
         score_thresh : float, optional
-            Score threshold to exceed when voting on content from raw
-            pages. By default, ``0.8``.
-        text_splitter : LCTextSplitter, optional
-            Optional text splitter instance to attach to doc (used for
-            splitting out pages in an HTML document).
-            By default, ``None``.
+            Threshold applied to the weighted page vote. Documents at or
+            above the threshold are considered jurisdiction matches.
+            Default is ``0.8``.
+        text_splitter : elm.web.text_splitter.LCTextSplitter, optional
+            Optional splitter attached to documents lacking a
+            ``text_splitter`` attribute so validators can iterate page
+            content consistently. Default is ``None``.
         **kwargs
-            Additional keyword arguments to pass to the
-            :class:`~compass.llm.calling.BaseLLMCaller` instance.
+            Additional keyword arguments forwarded to
+            :class:`~compass.llm.calling.BaseLLMCaller` and reused when
+            instantiating subordinate validators.
         """
         self.score_thresh = score_thresh
         self.text_splitter = text_splitter
         self.kwargs = kwargs
 
     async def check(self, doc, jurisdiction):
-        """Check if the document belongs to the county
+        """Assess whether a document applies to the jurisdiction
 
         Parameters
         ----------
         doc : elm.web.document.BaseDocument
-            Document instance. Should contain a "source" key in the
-            ``attrs`` that contains a URL (used for the URL validation
-            check). Raw content will be parsed for county name and
-            correct jurisdiction.
+            Document to evaluate. The validator expects
+            ``doc.raw_pages`` and, when available, a
+            ``doc.attrs['source']`` URL for supplemental URL validation.
+        jurisdiction : compass.utilities.location.Jurisdiction
+            Target jurisdiction descriptor capturing the required
+            location attributes.
 
         Returns
         -------
         bool
-            `True` if the doc contents pertain to the input county.
-            `False` otherwise.
+            ``True`` when either the URL or document text validation
+            confirms jurisdiction alignment, ``False`` otherwise.
+
+        Raises
+        ------
+        compass.exceptions.COMPASSError
+            Propagated if subordinate validators encounter LLM caller
+            errors.
+
+        Notes
+        -----
+        The method temporarily overrides ``doc.text_splitter`` when a
+        custom splitter is provided, ensuring the original splitter is
+        restored after validation completes.
+
+        Examples
+        --------
+        >>> validator = JurisdictionValidator()
+        >>> await validator.check(document, jurisdiction)
+        True
         """
         if hasattr(doc, "text_splitter") and self.text_splitter is not None:
             old_splitter = doc.text_splitter
@@ -244,7 +294,13 @@ async def _check(self, doc, jurisdiction):
 
 
 class JurisdictionWebsiteValidator:
-    """COMPASS Ordinance Jurisdiction Website validator"""
+    """Validate whether a website is the primary jurisdiction portal
+
+    Notes
+    -----
+    The validator stores the initialization arguments so they can be
+    reused across many documents without reconfiguration.
+    """
 
     WEB_PAGE_CHECK_SYSTEM_MESSAGE = (
         "You are an expert data analyst that examines website text to "
@@ -261,36 +317,55 @@ def __init__(
 
         Parameters
         ----------
-        browser_semaphore : :class:`asyncio.Semaphore`, optional
-            Semaphore instance that can be used to limit the number of
-            playwright browsers open concurrently. If ``None``, no
-            limits are applied. By default, ``None``.
+        browser_semaphore : asyncio.Semaphore, optional
+            Semaphore constraining concurrent Playwright usage.
+            ``None`` applies no concurrency limit. Default is ``None``.
         file_loader_kwargs : dict, optional
-            Dictionary of keyword arguments pairs to initialize
-            :class:`elm.web.file_loader.AsyncWebFileLoader`.
-            By default, ``None``.
+            Keyword arguments passed to
+            :class:`elm.web.file_loader.AsyncWebFileLoader`. Default is
+            ``None``.
         **kwargs
-            Additional keyword arguments to pass to the
-            :class:`~compass.llm.calling.BaseLLMCaller` instance.
-
+            Additional keyword arguments cached for downstream LLM
+            calls triggered during validation.
         """
         self.browser_semaphore = browser_semaphore
         self.file_loader_kwargs = file_loader_kwargs or {}
         self.kwargs = kwargs
 
     async def check(self, url, jurisdiction):
-        """Check if the website is the main website for a jurisdiction
+        """Determine whether a website serves as a jurisdiction's portal
+
+        The validator first performs an inexpensive URL classification
+        before downloading page content. Only when the URL fails the
+        initial check does it fetch and inspect the page text using a
+        generic LLM caller.
 
         Parameters
         ----------
         url : str
-            URL of the website to validate.
+            URL to inspect. Empty values return ``False`` immediately.
+        jurisdiction : compass.utilities.location.Jurisdiction
+            Target jurisdiction descriptor used to frame the validation
+            prompts.
 
         Returns
         -------
         bool
-            ``True`` if the website is the main website for the given
-            jurisdiction; ``False`` otherwise.
+            ``True`` when either the URL quick check or the full page
+            evaluation indicates the site is the official main website
+            for the jurisdiction.
+
+        Raises
+        ------
+        compass.exceptions.COMPASSError
+            Propagated from :class:`BaseLLMCaller` if configured to
+            raise on LLM failures.
+
+        Examples
+        --------
+        >>> validator = JurisdictionWebsiteValidator()
+        >>> await validator.check("https://county.gov", jurisdiction)
+        True
         """
 
         url_validator = DTreeURLJurisdictionValidator(
diff --git a/compass/warn.py b/compass/warn.py
index 89976aab7..677688dc4 100644
--- a/compass/warn.py
+++ b/compass/warn.py
@@ -10,7 +10,6 @@ class COMPASSWarning(UserWarning):
     """Generic COMPASS Warning"""
 
     def __init__(self, *args, **kwargs):
-        """Init exception and broadcast message to logger."""
         super().__init__(*args, **kwargs)
         if args:
             logger.warning(str(args[0]), stacklevel=2)

From f63e0dd7e134b9f14120f1fb9c0e67924c0b971c Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Thu, 20 Nov 2025 14:02:16 -0700
Subject: [PATCH 2/4] Update copilot instructions

---
 .github/copilot-instructions.md | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
index b3777f3ee..e54f15e37 100644
--- a/.github/copilot-instructions.md
+++ b/.github/copilot-instructions.md
@@ -90,8 +90,11 @@ Use `pixi add --feature python-dev <package>` to add a dependency that is only u
 - Never include a period (".") at the end of the first line of docstrings.
 - Do not add a short summary to __init__ methods. Instead, keep the line blank and start the "Parameters" section after a second newline.
 - Do not document parameters in the class docstring - do that in the __init__ docstring instead.
-- All @property and @cached_property method documentation should be one line long and should start with the return type.
-- "Protected" functions and methods should always be documented using only one-line summary docstrings.
+- Do not add docstring to dunder methods (e.g., __str__, __repr__, etc.) unless absolutely necessary.
+- All @property and @cached_property method documentation should be one line long and should start with the return type followed by a colon (e.g. `"""str: My string property"""`).
+- If a parameter has a default value, always end the description with the sentence `"By default, <default value>."`
+- If the default value for a parameter is **not** `None`, document it using the format: `param_name : type, default=<default value>`. If the default value for a parameter **is** `None`, use the format : `param_name : type, optional`.
+- "Protected" functions and methods (i.e. starting with an underscore) should always be documented using **only** one-line summary docstrings.
 
 ## 7. Coding Guidelines (Rust)
 - Workspace-managed deps; update root `Cargo.toml` if adding shared dependency.

From 8509ccbb3b56a0a8d5fb24065be9ed284b998d35 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Thu, 20 Nov 2025 14:18:29 -0700
Subject: [PATCH 3/4] Fix crosslinks

---
 compass/common/base.py         |  7 ++++---
 compass/extraction/apply.py    |  6 +++---
 compass/utilities/finalize.py  |  3 +--
 compass/utilities/parsing.py   |  5 ++---
 compass/validation/graphs.py   | 10 +++++-----
 compass/validation/location.py | 15 ++++++++-------
 6 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/compass/common/base.py b/compass/common/base.py
index e7df1a974..5447032a4 100644
--- a/compass/common/base.py
+++ b/compass/common/base.py
@@ -164,9 +164,10 @@ async def run_async_tree(tree, response_as_json=True):
     ----------
     tree : AsyncDecisionTree
         Decision tree to execute.
-    response_as_json : bool, optional
-        When ``True`` (default), attempts to parse the LLM response as
-        JSON using :func:`compass.utilities.llm_response_as_json`.
+    response_as_json : bool, default=True
+        If ``True``, attempts to parse the LLM response as JSON using
+        :func:`compass.utilities.parsing.llm_response_as_json`.
+        By default, ``True``.
 
     Returns
     -------
diff --git a/compass/extraction/apply.py b/compass/extraction/apply.py
index 590a371b1..e43008902 100644
--- a/compass/extraction/apply.py
+++ b/compass/extraction/apply.py
@@ -207,7 +207,7 @@ async def extract_ordinance_text_with_llm(
         Optional Langchain text splitter (or subclass instance), or any
         object that implements a `split_text` method. The method should
         take text as input (str) and return a list of text chunks.
-    extractor : compass.extraction.base.OrdinanceTextExtractor
+    extractor : object
         Extractor instance exposing ``parsers`` that consume text
         chunks and update ``doc.attrs``.
     original_text_key : str
@@ -272,7 +272,7 @@ async def extract_ordinance_text_with_ngram_validation(
         Optional Langchain text splitter (or subclass instance), or any
         object that implements a `split_text` method. The method should
         take text as input (str) and return a list of text chunks.
-    extractor : compass.extraction.base.OrdinanceTextExtractor
+    extractor : object
         Extractor instance exposing ``parsers`` that consume text
         chunks and update ``doc.attrs``.
     original_text_key : str
@@ -431,7 +431,7 @@ async def extract_ordinance_values(doc, parser, text_key, out_key):
         that are found to contain ordinance data. Note that if the
         document's attrs does not contain the `text_key` key, it will
         not be processed.
-    parser : compass.extraction.base.StructuredParser
+    parser : object
         Parser instance with an async ``parse`` method that converts
         cleaned ordinance text into structured values.
     text_key : str
diff --git a/compass/utilities/finalize.py b/compass/utilities/finalize.py
index be1a457b8..c3b9424b1 100644
--- a/compass/utilities/finalize.py
+++ b/compass/utilities/finalize.py
@@ -149,8 +149,7 @@ def doc_infos_to_db(doc_infos):
     Returns
     -------
     pandas.DataFrame
-        Consolidated ordinance dataset containing the columns enumerated
-        in :data:`_PARSED_COLS`.
+        Consolidated ordinance dataset.
     int
         Number of jurisdictions contributing at least one ordinance to
         the consolidated dataset.
diff --git a/compass/utilities/parsing.py b/compass/utilities/parsing.py
index aac176754..f7def5d5c 100644
--- a/compass/utilities/parsing.py
+++ b/compass/utilities/parsing.py
@@ -242,9 +242,8 @@ def load_config(config_fp):
 
     Notes
     -----
-    JSON5 loading is handled via :mod:`pyjson5`, enabling comments and
-    trailing commas, among other quality-of-life improvements over
-    standard JSON, which uses the built-in :func:`json.load`.
+    JSON5 enables comments and trailing commas, among other
+    quality-of-life improvements over vanilla JSON.
     """
     config_fp = Path(config_fp)
 
diff --git a/compass/validation/graphs.py b/compass/validation/graphs.py
index 1bb9a1984..126bca8ba 100644
--- a/compass/validation/graphs.py
+++ b/compass/validation/graphs.py
@@ -14,9 +14,9 @@ def setup_graph_correct_document_type(**kwargs):
     ----------
     **kwargs
         Additional keyword arguments forwarded to
-        :func:`compass.common.setup_graph_no_nodes`. The helper consumes
-        ``doc_is_from_ocr`` (default ``False``) to alter draft-detection
-        prompts for scanned documents.
+        :func:`compass.common.base.setup_graph_no_nodes`. The helper
+        consumes ``doc_is_from_ocr`` (default ``False``) to alter
+        draft-detection prompts for scanned documents.
 
     Returns
     -------
@@ -295,7 +295,7 @@ def setup_graph_correct_jurisdiction_type(jurisdiction, **kwargs):
         Target jurisdiction descriptor that guides prompt wording.
     **kwargs
         Additional keyword arguments forwarded to
-        :func:`compass.common.setup_graph_no_nodes` (for example,
+        :func:`compass.common.base.setup_graph_no_nodes` (for example,
         ``usage_tracker`` or ``llm_service`` identifiers).
 
     Returns
@@ -525,7 +525,7 @@ def setup_graph_correct_jurisdiction_from_url(jurisdiction, **kwargs):
         phrases used in prompts.
     **kwargs
         Additional keyword arguments forwarded to
-        :func:`compass.common.setup_graph_no_nodes`.
+        :func:`compass.common.base.setup_graph_no_nodes`.
 
     Returns
     -------
diff --git a/compass/validation/location.py b/compass/validation/location.py
index 6d07d7a7a..25d994ae6 100644
--- a/compass/validation/location.py
+++ b/compass/validation/location.py
@@ -71,14 +71,15 @@ async def check(self, url):
         ------
         compass.exceptions.COMPASSError
             Propagated if underlying LLM interactions fail while the
-            caller has configured :class:`BaseLLMCaller` to raise.
+            caller has configured
+            :class:`~compass.llm.calling.BaseLLMCaller` to raise.
 
         Notes
         -----
         The method delegates to an internal asynchronous decision tree
-        backed by :class:`ChatLLMCaller`. The validator aggregates
-        structured responses and only approves when each required
-        attribute matches the target jurisdiction.
+        backed by :class:`~compass.llm.calling.ChatLLMCaller`. The
+        validator aggregates structured responses and only approves when
+        each required attribute matches the target jurisdiction.
         """
         if not url:
             return False
@@ -207,7 +208,7 @@ def __init__(self, score_thresh=0.8, text_splitter=None, **kwargs):
             Threshold applied to the weighted page vote. Documents at or
             above the threshold are considered jurisdiction matches.
             Default is ``0.8``.
-        text_splitter : elm.web.text_splitter.LCTextSplitter, optional
+        text_splitter : LCTextSplitter, optional
             Optional splitter attached to documents lacking a
             ``text_splitter`` attribute so validators can iterate page
             content consistently. Default is ``None``.
@@ -358,8 +359,8 @@ async def check(self, url, jurisdiction):
         Raises
         ------
         compass.exceptions.COMPASSError
-            Propagated from :class:`BaseLLMCaller` if configured to
-            raise on LLM failures.
+            Propagated from :class:`~compass.llm.calling.BaseLLMCaller`
+            if configured to raise on LLM failures.
 
         Examples
         --------

From c77899049992c9d21c9502e343b7b65f99d6d5d5 Mon Sep 17 00:00:00 2001
From: ppinchuk <pinchukpaul@gmail.com>
Date: Thu, 20 Nov 2025 14:20:35 -0700
Subject: [PATCH 4/4] Implement PR review comments

---
 compass/pb.py                | 4 ++--
 compass/utilities/base.py    | 8 ++++++++
 compass/validation/graphs.py | 2 +-
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/compass/pb.py b/compass/pb.py
index 146314a33..93a4e4704 100644
--- a/compass/pb.py
+++ b/compass/pb.py
@@ -182,10 +182,10 @@ def update_total_cost(self, cost, replace=False):
 
         Parameters
         ----------
-        cost : float
+        cost : float or int
             Cost increment or replacement value in US dollars.
         replace : bool, optional
-            When ``True`` the total cost is replaced by ``cost`` ,
+            When ``True`` the total cost is replaced by ``cost``,
             provided it does not move backwards. When ``False``
             the cost is added cumulatively. By default, ``False``.
         """
diff --git a/compass/utilities/base.py b/compass/utilities/base.py
index 4580b062b..d40393645 100644
--- a/compass/utilities/base.py
+++ b/compass/utilities/base.py
@@ -204,6 +204,14 @@ def __init__(
         )
 
     def __iter__(self):
+        """Yield managed directory paths in canonical order
+
+        Yields
+        ------
+        pathlib.Path
+            Each of the managed directories in the following order:
+            out, logs, clean_files, ordinance_files, jurisdiction_dbs.
+        """
         yield self.out
         yield self.logs
         yield self.clean_files
diff --git a/compass/validation/graphs.py b/compass/validation/graphs.py
index 126bca8ba..92d4ee05a 100644
--- a/compass/validation/graphs.py
+++ b/compass/validation/graphs.py
@@ -29,7 +29,7 @@ def setup_graph_correct_document_type(**kwargs):
     -----
     The resulting graph encodes a structured sequence of Yes/No prompts
     that culminate in a JSON response containing summary metadata and a
-    legal-text boolean keyed by ``{key}``.
+    legal-text boolean.
     """
     doc_is_from_ocr = kwargs.pop("doc_is_from_ocr", False)