Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .github/copilot-instructions.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,11 @@ Use `pixi add --feature python-dev <package>` to add a dependency that is only u
- Never include a period (".") at the end of the first line of docstrings.
- Do not add a short summary to __init__ methods. Instead, keep the line blank and start the "Parameters" section after a second newline.
- Do not document parameters in the class docstring - do that in the __init__ docstring instead.
- All @property and @cached_property method documentation should be one line long and should start with the return type.
- "Protected" functions and methods should always be documented using only one-line summary docstrings.
- Do not add docstring to dunder methods (e.g., __str__, __repr__, etc.) unless absolutely necessary.
- All @property and @cached_property method documentation should be one line long and should start with the return type followed by a colon (e.g. `"""str: My string property"""`).
- If a parameter has a default value, always end the description with the sentence `"By default, <default value>."`
- If the default value for a parameter is **not** `None`, document it using the format: `param_name : type, default=<default value>`. If the default value for a parameter **is** `None`, use the format : `param_name : type, optional`.
- "Protected" functions and methods (i.e. starting with an underscore) should always be documented using **only** one-line summary docstrings.
- To exclude functions or classes from the public API documentation, start the docstring with the token ``[NOT PUBLIC API]``.

## 7. Coding Guidelines (Rust)
Expand Down
2 changes: 1 addition & 1 deletion compass/_cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
@click.version_option(version=__version__)
@click.pass_context
def main(ctx):
"""Ordinance command line interface"""
"""COMPASS command line interface"""
ctx.ensure_object(dict)


Expand Down
71 changes: 67 additions & 4 deletions compass/common/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,15 +133,53 @@ def llm_response_does_not_start_with_no(response):
def setup_async_decision_tree(
graph_setup_func, usage_sub_label=None, **kwargs
):
"""Setup Async Decision tree for ordinance extraction"""
"""Setup an ``AsyncDecisionTree`` for ordinance extraction

Parameters
----------
graph_setup_func : callable
Factory that returns a fully configured
:class:`networkx.DiGraph`.
usage_sub_label : str, optional
Optional usage label reported to the LLM usage tracker.
**kwargs
Keyword arguments forwarded to ``graph_setup_func``.

Returns
-------
AsyncDecisionTree
Decision tree wrapping the graph produced by
``graph_setup_func``.

Notes
-----
The function asserts that the tree has recorded at least the system
prompt before returning the constructed wrapper.
"""
G = graph_setup_func(**kwargs) # noqa: N806
tree = AsyncDecisionTree(G, usage_sub_label=usage_sub_label)
assert len(tree.chat_llm_caller.messages) == 1
return tree


async def run_async_tree(tree, response_as_json=True):
"""Run Async Decision Tree and return output as dict"""
"""Run an async decision tree and optionally parse JSON output

Parameters
----------
tree : AsyncDecisionTree
Decision tree to execute.
response_as_json : bool, default=True
If ``True``, attempts to parse the LLM response as JSON using
:func:`compass.utilities.parsing.llm_response_as_json`.
By default, ``True``.

Returns
-------
dict or str or None
Parsed dictionary when ``response_as_json`` is ``True``, raw
response otherwise. Returns ``None`` if execution fails.
"""
try:
response = await tree.async_run()
except COMPASSRuntimeError:
Expand All @@ -154,14 +192,39 @@ async def run_async_tree(tree, response_as_json=True):


async def run_async_tree_with_bm(tree, base_messages):
"""Run Async Decision Tree from base messages; return dict output"""
"""Run an async decision tree using seed "base" messages

Parameters
----------
tree : AsyncDecisionTree
Decision tree to execute.
base_messages : list of dict
Messages to preload into the tree's chat caller before running.

Returns
-------
dict or str or None
Output from :func:`run_async_tree`, filtered by the
``response_as_json`` default.
"""
tree.chat_llm_caller.messages = base_messages
assert len(tree.chat_llm_caller.messages) == len(base_messages)
return await run_async_tree(tree)


def empty_output(feature):
"""Empty output for a feature (not found in text)"""
"""Return the default empty result for a missing feature

Parameters
----------
feature : str
Name of the feature to seed in the empty output structure.

Returns
-------
list of dict
Empty result placeholders used by downstream extraction logic.
"""
if feature in {"structures", "property line"}:
return [
{"feature": f"{feature} (participating)"},
Expand Down
24 changes: 10 additions & 14 deletions compass/common/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,32 +66,22 @@ def __init__(self, graph, usage_sub_label=None):

@property
def chat_llm_caller(self):
"""ChatLLMCaller: ChatLLMCaller instance for this tree"""
"""ChatLLMCaller: LLM caller bound to the decision tree"""
return self.graph.graph["chat_llm_caller"]

@cached_property
def tree_name(self):
"""str: Name of the decision tree"""
"""str: Configured decision tree name"""
return self._g.graph.get("_d_tree_name", "Unknown decision tree")

@property
def messages(self):
"""Get a list of the conversation messages with the LLM

Returns
-------
list
"""
"""list: Conversation messages exchanged with the LLM"""
return self.chat_llm_caller.messages

@property
def all_messages_txt(self):
"""Get a printout of the full conversation with the LLM

Returns
-------
str
"""
"""str: Formatted conversation transcript"""
messages = [
f"{msg['role'].upper()}: {msg['content']}" for msg in self.messages
]
Expand Down Expand Up @@ -140,6 +130,12 @@ async def async_run(self, node0="init"):
out : str or None
Final response from LLM at the leaf node or ``None`` if an
``AttributeError`` was raised during execution.

Raises
------
compass.exceptions.COMPASSRuntimeError
Raised when the traversal encounters an unexpected
exception that is not an ``AttributeError``.
"""

self._history = []
Expand Down
1 change: 0 additions & 1 deletion compass/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ class COMPASSError(Exception):
"""Generic COMPASS Error"""

def __init__(self, *args, **kwargs):
"""Init exception and broadcast message to logger"""
super().__init__(*args, **kwargs)
if args:
logger.error(
Expand Down
45 changes: 39 additions & 6 deletions compass/extraction/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,20 @@ async def check_for_ordinance_info(
has the ``"contains_ord_info"`` key, it will not be processed.
To force a document to be processed by this function, remove
that key from the documents ``attrs``.
model_config : compass.llm.config.LLMConfig
Configuration describing which LLM service, splitter, and call
parameters should be used for extraction.
heuristic : object
Domain-specific heuristic implementing a ``check`` method to
qualify text chunks for further processing.
tech : str
Technology of interest (e.g. "solar", "wind", etc). This is
used to set up some document validation decision trees.
text_splitter : LCTextSplitter, optional
Optional Langchain text splitter (or subclass instance), or any
object that implements a `split_text` method. The method should
take text as input (str) and return a list of text chunks.
ordinance_text_collector_class : type
Collector class invoked to capture ordinance text chunks.
permitted_use_text_collector_class : type, optional
Collector class used to capture permitted-use districts text.
When ``None``, the permitted-use workflow is skipped.
usage_tracker : UsageTracker, optional
Optional tracker instance to monitor token usage during
LLM calls. By default, ``None``.
Expand All @@ -61,6 +68,12 @@ async def check_for_ordinance_info(
and an ``"ordinance_text"`` key containing the ordinance text
snippet. Note that the snippet may contain other info as well,
but should encapsulate all of the ordinance text.

Notes
-----
The function updates progress bar logging as chunks are processed
and sets ``contains_district_info`` when
``permitted_use_text_collector_class`` is provided.
"""
if "contains_ord_info" in doc.attrs:
return doc
Expand Down Expand Up @@ -134,6 +147,9 @@ async def extract_date(doc, model_config, usage_tracker=None):
----------
doc : elm.web.document.BaseDocument
A document potentially containing date information.
model_config : compass.llm.config.LLMConfig
Configuration describing which LLM service, splitter, and call
parameters should be used for date extraction.
usage_tracker : UsageTracker, optional
Optional tracker instance to monitor token usage during
LLM calls. By default, ``None``.
Expand All @@ -145,6 +161,11 @@ async def extract_date(doc, model_config, usage_tracker=None):
the parsing are stored in the documents attrs. In particular,
the attrs will contain a ``"date"`` key that will contain the
parsed date information.

Notes
-----
Documents already containing a ``"date"`` attribute are returned
without reprocessing.
"""
if "date" in doc.attrs:
logger.debug(
Expand Down Expand Up @@ -186,8 +207,9 @@ async def extract_ordinance_text_with_llm(
Optional Langchain text splitter (or subclass instance), or any
object that implements a `split_text` method. The method should
take text as input (str) and return a list of text chunks.
extractor : WindOrdinanceTextExtractor
Object used for ordinance text extraction.
extractor : object
Extractor instance exposing ``parsers`` that consume text
chunks and update ``doc.attrs``.
original_text_key : str
String corresponding to the `doc.attrs` key containing the
original text (before extraction).
Expand Down Expand Up @@ -250,6 +272,9 @@ async def extract_ordinance_text_with_ngram_validation(
Optional Langchain text splitter (or subclass instance), or any
object that implements a `split_text` method. The method should
take text as input (str) and return a list of text chunks.
extractor : object
Extractor instance exposing ``parsers`` that consume text
chunks and update ``doc.attrs``.
original_text_key : str
String corresponding to the `doc.attrs` key containing the
original text (before extraction).
Expand Down Expand Up @@ -406,6 +431,9 @@ async def extract_ordinance_values(doc, parser, text_key, out_key):
that are found to contain ordinance data. Note that if the
document's attrs does not contain the `text_key` key, it will
not be processed.
parser : object
Parser instance with an async ``parse`` method that converts
cleaned ordinance text into structured values.
text_key : str
Name of the key under which cleaned text is stored in
`doc.attrs`. This text should be ready for extraction.
Expand All @@ -418,6 +446,11 @@ async def extract_ordinance_values(doc, parser, text_key, out_key):
elm.web.document.BaseDocument
Document that has been parsed for ordinance values. The results
of the extraction are stored in the document's attrs.

Notes
-----
When the cleaned text is missing or empty the function emits a
:class:`compass.warn.COMPASSWarning` and leaves ``doc`` unchanged.
"""
if not doc.attrs.get(text_key):
msg = (
Expand Down
Loading