diff --git a/.gitignore b/.gitignore index 25e3f8c03..ead9de5e1 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,4 @@ __pypackages__/ # Added by cargo /target +examples/ diff --git a/compass/extraction/geothermal_electricity/__init__.py b/compass/extraction/geothermal_electricity/__init__.py new file mode 100644 index 000000000..540578c45 --- /dev/null +++ b/compass/extraction/geothermal_electricity/__init__.py @@ -0,0 +1,38 @@ +"""Geothermal electricity generation ordinance extraction utilities""" + +from .ordinance import ( + GeothermalElectricityHeuristic, + GeothermalElectricityOrdinanceTextCollector, + GeothermalElectricityOrdinanceTextExtractor, + GeothermalElectricityPermittedUseDistrictsTextCollector, + GeothermalElectricityPermittedUseDistrictsTextExtractor, +) + + +GEOTHERMAL_ELECTRICITY_QUESTION_TEMPLATES = [ + "filetype:pdf {jurisdiction} geothermal power plant ordinance", + "geothermal electricity generation ordinance {jurisdiction}", + "{jurisdiction} geothermal energy facility zoning ordinance", + ( + "Where can I find the legal text for geothermal power plant " + "zoning ordinances in {jurisdiction}?" + ), + ( + "What is the specific legal information regarding zoning " + "ordinances for geothermal electricity generation facilities in " + "{jurisdiction}?" + ), +] + +BEST_GEOTHERMAL_ELECTRICITY_ORDINANCE_WEBSITE_URL_KEYWORDS = { + "pdf": 92160, + "geothermal": 46080, + "ordinance": 23040, + "zoning": 11520, + "regulation": 5760, + "code": 2880, + "power": 1440, + "electricity": 1440, + "planning": 720, + "government": 180, +} diff --git a/compass/extraction/geothermal_electricity/ordinance.py b/compass/extraction/geothermal_electricity/ordinance.py new file mode 100644 index 000000000..41f7ca90c --- /dev/null +++ b/compass/extraction/geothermal_electricity/ordinance.py @@ -0,0 +1,644 @@ +"""Geothermal electricity ordinance content collection + +These methods help filter down the document text to only the portions +relevant to utility-scale geothermal electricity generation ordinances. +""" + +import logging + +from compass.common import BaseTextExtractor +from compass.validation.content import Heuristic +from compass.llm.calling import StructuredLLMCaller +from compass.utilities.enums import LLMUsageCategory +from compass.utilities.parsing import merge_overlapping_texts + + +logger = logging.getLogger(__name__) + + +_LARGE_GEOTHERMAL_SYNONYMS = ( + "geothermal power plants, geothermal electricity generation facilities, " + "geothermal energy facilities, utility-scale geothermal systems, " + "commercial geothermal energy systems, geothermal generating facilities, " + "geothermal production facilities, or similar" +) +_SEARCH_TERMS_AND = ( + "zoning, siting, setback, system design, and operational " + "requirements/restrictions" +) +_SEARCH_TERMS_OR = _SEARCH_TERMS_AND.replace("and", "or") +_IGNORE_TYPES = ( + "geothermal heat pumps, ground-source heat pumps, residential geothermal, " + "HVAC systems, ground-coupled systems, geoexchange systems" +) + + +class GeothermalElectricityHeuristic(Heuristic): + """Perform a heuristic check for geothermal power plants in text""" + + NOT_TECH_WORDS = [ + "geothermal heat pump", + "ground source heat pump", + "ground-source heat pump", + "ghp", + "ground heat pump", + "gshp", + "ground-coupled heat pump", + "ground coupled heat pump", + "earth-coupled heat pump", + "earth-source heat pump", + "geoexchange", + "geo-exchange", + "closed loop", + "closed-loop", + "open loop", + "vertical loop", + "horizontal loop", + "heating and cooling", + "hvac", + "space heating", + "water heating", + "direct use", + "direct-use", + "district heating", + "greenhouse heating", + "residential geothermal", + "accessory use", + "energy star", + ] + """Words/phrases: text is NOT about geothermal power plants""" + GOOD_TECH_KEYWORDS = [ + "geothermal", + "setback", + "turbine", + "generator", + "wellfield", + ] + """Words that indicate we should keep a chunk for analysis""" + GOOD_TECH_ACRONYMS = [] + """Acronyms for geothermal power plants that we want to capture""" + GOOD_TECH_PHRASES = [ + "turbine", + "generator", + "wellfield", + "well field", + "injection", + "production well", + "generating capacity", + "power production", + "electricity generation", + "megawatt", + "kilowatt", + "gigawatt", + "transmission line", + "substation", + "power plant", + "cooling tower", + "condenser", + "separator", + "switchyard", + "transformer", + "baseload", + "geothermal exploration", + "geothermal generating", + "geothermal generation", + "geothermal power", + "geothermal production", + "geothermal project", + "geothermal overlay zone", + "geothermal power plant", + "geothermal facility", + "geothermal electric", + "geothermal energy facility", + "steam turbine", + "binary cycle", + "flash steam", + "dry steam", + "enhanced geothermal", + "reservoir temperature", + "brine", + "reinjection well", + "production zone", + "geothermal resource", + "thermal power", + "renewable energy generation", + "baseload power", + "geothermal production project", + "geothermal drilling", + "exploratory well", + "injection well", + "geothermal lease", + "drilling permit", + "plan of operation", + "plan of utilization", + "known geothermal resource", + "utility scale", + "commercial renewable", + "geothermal development", + ] + """Phrases that indicate text is about geothermal power plants""" + + +class GeothermalElectricityOrdinanceTextCollector(StructuredLLMCaller): + """Check text chunks for ordinances and collect them if they do""" + + CONTAINS_ORD_PROMPT = ( + "You extract structured data from text. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly two " + "keys. The first key is 'geothermal_reqs', which is a string that " + f"summarizes all {_SEARCH_TERMS_AND} that are explicitly enacted " + "in the legal text for geothermal electricity generation " + "facilities for a given jurisdiction. " + "Note that geothermal power plant bans are an important " + "restriction to track. " + "Include any **closely related provisions** if they clearly " + "pertain to the **development, operation, modification, or " + "removal** of geothermal electricity generation facilities (or " + "geothermal power plants). " + "All restrictions should be enforceable - ignore any text that " + "only provides a legal definition of the regulation. If the text " + f"does not specify any concrete {_SEARCH_TERMS_OR} for a " + "geothermal electricity generation facility, set this key to " + "`null`. The last key is '{key}', which is a boolean that is set " + "to True if the text excerpt explicitly details " + f"{_SEARCH_TERMS_OR} for a geothermal electricity generation " + "facility and False otherwise." + ) + """Prompt to check if chunk contains geothermal ordinance info""" + + IS_UTILITY_SCALE_PROMPT = ( + "You are a legal scholar that reads ordinance text and determines " + f"whether it applies to {_SEARCH_TERMS_OR} for **utility-scale " + "geothermal electricity generation facilities**. Utility-scale " + "geothermal electricity generation facilities may " + f"also be referred to as {_LARGE_GEOTHERMAL_SYNONYMS}. " + "Your client is a geothermal power plant developer that does not " + f"care about ordinances related to {_IGNORE_TYPES}. " + "Ignore any text related to such systems. " + "Return your answer as a dictionary in JSON format (not markdown). " + "Your JSON file must include exactly two keys. The first key is " + "'summary' which contains a string that summarizes the types of " + "geothermal systems the text applies to (if any). The second key " + "is '{key}', which is a boolean that is set to True if any part of " + f"the text excerpt details {_SEARCH_TERMS_OR} for the **utility-scale " + "geothermal electricity generation facilities** (or similar) that the " + "client is interested in and False otherwise." + ) + """Prompt to check if chunk is for utility-scale geothermal""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._ordinance_chunks = {} + + async def check_chunk(self, chunk_parser, ind): + """Check a chunk at a given ind to see if it contains ordinance + + Parameters + ---------- + chunk_parser : ParseChunksWithMemory + Instance that contains a ``parse_from_ind`` method. + ind : int + Index of the chunk to check. + + Returns + ------- + bool + Boolean flag indicating whether or not the text in the chunk + contains large solar energy farm ordinance text. + """ + contains_ord_info = await chunk_parser.parse_from_ind( + ind, + key="contains_ord_info", + llm_call_callback=self._check_chunk_contains_ord, + ) + if not contains_ord_info: + logger.debug("Text at ind %d does not contain ordinance info", ind) + return False + + logger.debug("Text at ind %d does contain ordinance info", ind) + + is_utility_scale = await chunk_parser.parse_from_ind( + ind, + key="x", + llm_call_callback=self._check_chunk_is_for_utility_scale, + ) + if not is_utility_scale: + logger.debug("Text at ind %d is not for utility-scale SEF", ind) + return False + + logger.debug("Text at ind %d is for utility-scale SEF", ind) + + _store_chunk(chunk_parser, ind, self._ordinance_chunks) + logger.debug("Added text at ind %d to ordinances", ind) + + return True + + @property + def contains_ord_info(self): + """bool: Flag indicating whether text contains ordinance info""" + return bool(self._ordinance_chunks) + + @property + def ordinance_text(self): + """str: Combined ordinance text from the individual chunks""" + logger.debug( + "Grabbing %d ordinance chunk(s) from original text at these " + "indices: %s", + len(self._ordinance_chunks), + list(self._ordinance_chunks), + ) + + text = [ + self._ordinance_chunks[ind] + for ind in sorted(self._ordinance_chunks) + ] + return merge_overlapping_texts(text) + + async def _check_chunk_contains_ord(self, key, text_chunk): + """Call LLM on a chunk of text to check for ordinance""" + content = await self.call( + sys_msg=self.CONTAINS_ORD_PROMPT.format(key=key), + content=text_chunk, + usage_sub_label=(LLMUsageCategory.DOCUMENT_CONTENT_VALIDATION), + ) + logger.debug("LLM response: %s", content) + return content.get(key, False) + + async def _check_chunk_is_for_utility_scale(self, key, text_chunk): + """Call LLM on a chunk of text to check for utility scale""" + content = await self.call( + sys_msg=self.IS_UTILITY_SCALE_PROMPT.format(key=key), + content=text_chunk, + usage_sub_label=(LLMUsageCategory.DOCUMENT_CONTENT_VALIDATION), + ) + logger.debug("LLM response: %s", content) + return content.get(key, False) + + +class GeothermalElectricityPermittedUseDistrictsTextCollector( + StructuredLLMCaller +): + """Check text chunks for permitted geothermal districts""" + + DISTRICT_PROMPT = ( + "You are a legal scholar that reads ordinance text and determines " + "whether it explicitly contains relevant information to determine the " + "districts (and especially the district names) where " + "utility-scale geothermal electricity generation facilities are a " + "permitted use (primary, special, accessory, or otherwise), as " + "well as the districts where geothermal power plants are " + "prohibited entirely. Utility-scale " + "geothermal electricity generation facilities may also " + f"be referred to as {_LARGE_GEOTHERMAL_SYNONYMS}. " + "Do not make any inferences; only answer based on information that " + "is explicitly stated in the text. " + "Note that relevant information may sometimes be found in tables. " + "Return your answer as a dictionary in JSON format (not markdown). " + "Your JSON file must include exactly two keys. The first key is " + "'districts' which contains a string that lists all of the district " + "names for which the text explicitly permits **large solar energy " + "farms** (if any). The last key is '{key}', which is a boolean that " + "is set to True if any part of the text excerpt provides information " + "on districts where **large solar energy farms** (or similar) are a " + "permitted use and False otherwise." + ) + """Prompt to check if chunk contains info on permitted districts""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._district_chunks = {} + + async def check_chunk(self, chunk_parser, ind): + """Check a chunk to see if it contains permitted uses + + Parameters + ---------- + chunk_parser : ParseChunksWithMemory + Instance that contains a ``parse_from_ind`` method. + ind : int + Index of the chunk to check. + + Returns + ------- + bool + Boolean flag indicating whether or not the text in the chunk + contains large solar energy farm permitted use text. + """ + + key = "contains_district_info" + content = await self.call( + sys_msg=self.DISTRICT_PROMPT.format(key=key), + content=chunk_parser.text_chunks[ind], + usage_sub_label=( + LLMUsageCategory.DOCUMENT_PERMITTED_USE_CONTENT_VALIDATION + ), + ) + logger.debug("LLM response: %s", content) + contains_district_info = content.get(key, False) + + if contains_district_info: + _store_chunk(chunk_parser, ind, self._district_chunks) + logger.debug("Text at ind %d contains district info", ind) + return True + + logger.debug("Text at ind %d does not contain district info", ind) + return False + + @property + def contains_district_info(self): + """bool: Flag indicating whether text contains district info""" + return bool(self._district_chunks) + + @property + def permitted_use_district_text(self): + """str: Combined permitted use districts text from the chunks""" + logger.debug( + "Grabbing %d permitted use chunk(s) from original text at these " + "indices: %s", + len(self._district_chunks), + list(self._district_chunks), + ) + + text = [ + self._district_chunks[ind] for ind in sorted(self._district_chunks) + ] + return merge_overlapping_texts(text) + + +class GeothermalElectricityOrdinanceTextExtractor(BaseTextExtractor): + """Extract succinct ordinance text from input + + Purpose: + Extract relevant ordinance text from document. + Responsibilities: + 1. Extract portions from chunked document text relevant to + particular ordinance type (e.g. geothermal electricity + generation zoning for utility-scale systems). + Key Relationships: + Uses a StructuredLLMCaller for LLM queries. + """ + + GEOTHERMAL_ENERGY_SYSTEM_FILTER_PROMPT = ( + "# CONTEXT #\n" + "We want to reduce the provided excerpt to only contain information " + "about **geothermal electricity generation facilities**. The " + "extracted text will be used for structured data extraction, so it " + "must be both **comprehensive** (retaining all relevant details) and " + "**focused** (excluding unrelated content), with **zero rewriting or " + "paraphrasing**. Ensure that all retained information is **directly " + "applicable to geothermal electricity generation facilities** while " + "preserving full context and accuracy.\n" + "\n# OBJECTIVE #\n" + "Extract all text **pertaining to geothermal electricity generation " + "facilities** from the provided excerpt.\n" + "\n# RESPONSE #\n" + "Follow these guidelines carefully:\n" + "\n1. ## Scope of Extraction ##:\n" + "- Include **all** text that pertains to **geothermal " + "electricity generation facilities**, even if they are referred " + "to by different names such as:\n" + f"\t{_LARGE_GEOTHERMAL_SYNONYMS.capitalize()}.\n" + "- Explicitly include any text related to **bans or " + "prohibitions** on geothermal electricity generation " + "facilities.\n" + "- Explicitly include any text related to the adoption or " + "enactment date of the ordinance (if any).\n" + "\n2. ## Exclusions ##:\n" + "- Do **not** include text that does not pertain to geothermal " + "electricity generation facilities.\n" + "\n3. ## Formatting & Structure ##:\n" + "- **Preserve _all_ section titles, headers, and numberings** for " + "reference.\n" + "- **Maintain the original wording, formatting, and structure** to " + "ensure accuracy.\n" + "\n4. ## Output Handling ##:\n" + "- This is a strict extraction task — act like a text filter, **not** " + "a summarizer or writer.\n" + "- Do not add, explain, reword, or summarize anything.\n" + "- The output must be a **copy-paste** of the original excerpt.\n" + "**Absolutely no paraphrasing or rewriting.**\n" + "- The output must consist **only** of contiguous or discontiguous " + "verbatim blocks copied from the input.\n" + "- If **no relevant text** is found, return the response: " + "'No relevant text.'" + ) + """Prompt to extract ordinance text for geothermal electricity""" + + async def extract_geothermal_energy_system_section(self, text_chunks): + """Extract ordinance text from input text chunks for geothermal + + Parameters + ---------- + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. + + Returns + ------- + str + Ordinance text extracted from text chunks. + """ + return await self._process( + text_chunks=text_chunks, + instructions=self.GEOTHERMAL_ENERGY_SYSTEM_FILTER_PROMPT, + is_valid_chunk=_valid_chunk, + ) + + @property + def parsers(self): + """Iterable of parsers provided by this extractor + + Yields + ------ + name : str + Name describing the type of text output by the parser. + parser : callable + Async function that takes a ``text_chunks`` input and + outputs parsed text. + """ + yield ( + "cleaned_ordinance_text", + self.extract_geothermal_energy_system_section, + ) + + +class GeothermalElectricityPermittedUseDistrictsTextExtractor( + BaseTextExtractor +): + """Extract succinct ordinance text from input + + Purpose: + Extract relevant ordinance text from document. + Responsibilities: + 1. Extract portions from chunked document text relevant to + particular ordinance type (e.g. geothermal electricity + generation zoning for utility-scale systems). + Key Relationships: + Uses a StructuredLLMCaller for LLM queries. + """ + + _USAGE_LABEL = LLMUsageCategory.DOCUMENT_PERMITTED_USE_DISTRICTS_SUMMARY + + PERMITTED_USES_FILTER_PROMPT = ( + "# CONTEXT #\n" + "We want to reduce the provided excerpt to only contain information " + "detailing permitted use(s) for a district. The extracted text will " + "be used for structured data extraction, so it must be both " + "**comprehensive** (retaining all relevant details) and **focused** " + "(excluding unrelated content), with **zero rewriting or " + "paraphrasing**. Ensure that all retained information " + "is **directly applicable** to permitted use(s) for one or more " + "districts while preserving full context and accuracy.\n" + "\n# OBJECTIVE #\n" + "Remove all text **not directly pertinent** to permitted use(s) for " + "a district.\n" + "\n# RESPONSE #\n" + "Follow these guidelines carefully:\n" + "\n1. ## Scope of Extraction ##:\n" + "- Retain all text defining permitted use(s) for a district, " + "including:\n" + "\t- **Primary, Special, Conditional, Accessory, Prohibited, and " + "any other use types.**\n" + "\t- **District names and zoning classifications.**\n" + "- Pay extra attention to any references to **solar energy " + "facilities** or related terms.\n" + "- Ensure that **tables, lists, and structured elements** are " + "preserved as they may contain relevant details.\n" + "\n2. ## Exclusions ##:\n" + "- Do **not** include unrelated regulations, procedural details, " + "or non-use-based restrictions.\n" + "\n3. ## Formatting & Structure ##:\n" + "- **Preserve _all_ section titles, headers, and numberings** for " + "reference, **especially if they contain the district name**.\n" + "- **Maintain the original wording, formatting, and structure** to " + "ensure accuracy.\n" + "\n4. ## Output Handling ##:\n" + "- This is a strict extraction task — act like a text filter, **not** " + "a summarizer or writer.\n" + "- Do not add, explain, reword, or summarize anything.\n" + "- The output must be a **copy-paste** of the original excerpt.\n" + "**Absolutely no paraphrasing or rewriting.**\n" + "- The output must consist **only** of contiguous or discontiguous " + "verbatim blocks copied from the input.\n" + "- If **no relevant text** is found, return the response: " + "'No relevant text.'" + ) + """Prompt to extract ordinance text for permitted uses""" + + SEF_PERMITTED_USES_FILTER_PROMPT = ( + "# CONTEXT #\n" + "We want to reduce the provided excerpt to only contain information " + "detailing **solar energy system** permitted use(s) for a district. " + "The extracted text will be used for structured data extraction, so " + "it must be both **comprehensive** (retaining all relevant details) " + "and **focused** (excluding unrelated content), with **zero rewriting " + "or paraphrasing**. Ensure that all " + "retained information is **directly applicable** to permitted use(s) " + "for solar energy systems in one or more districts while " + "preserving full context and accuracy.\n" + "\n# OBJECTIVE #\n" + "Remove all text **not directly pertinent** to solar energy " + "conversion system permitted use(s) for a district.\n" + "\n# RESPONSE #\n" + "Follow these guidelines carefully:\n" + "\n1. ## Scope of Extraction ##:\n" + "- Retain all text defining permitted use(s) for a district, " + "including:\n" + "\t- **Primary, Special, Conditional, Accessory, Prohibited, and " + "any other use types.**\n" + "\t- **District names and zoning classifications.**\n" + "- Ensure that **tables, lists, and structured elements** are " + "preserved as they may contain relevant details.\n" + "\n2. ## Exclusions ##:\n" + "- Do not include text that does not pertain at all to solar " + "energy systems.\n" + "\n3. ## Formatting & Structure ##:\n" + "- **Preserve _all_ section titles, headers, and numberings** for " + "reference, **especially if they contain the district name**.\n" + "- **Maintain the original wording, formatting, and structure** to " + "ensure accuracy.\n" + "\n4. ## Output Handling ##:\n" + "- This is a strict extraction task — act like a text filter, **not** " + "a summarizer or writer.\n" + "- Do not add, explain, reword, or summarize anything.\n" + "- The output must be a **copy-paste** of the original excerpt.\n" + "**Absolutely no paraphrasing or rewriting.**\n" + "- The output must consist **only** of contiguous or discontiguous " + "verbatim blocks copied from the input.\n" + "- If **no relevant text** is found, return the response: " + "'No relevant text.'" + ) + """Prompt to extract ordinance text for permitted uses for SEF""" + + async def extract_permitted_uses(self, text_chunks): + """Extract permitted uses text from input text chunks + + Parameters + ---------- + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. + + Returns + ------- + str + Ordinance text extracted from text chunks. + """ + return await self._process( + text_chunks=text_chunks, + instructions=self.PERMITTED_USES_FILTER_PROMPT, + is_valid_chunk=_valid_chunk, + ) + + async def extract_sef_permitted_uses(self, text_chunks): + """Extract permitted uses text for large SEF from input text + + Parameters + ---------- + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. + + Returns + ------- + str + Ordinance text extracted from text chunks. + """ + return await self._process( + text_chunks=text_chunks, + instructions=self.SEF_PERMITTED_USES_FILTER_PROMPT, + is_valid_chunk=_valid_chunk, + ) + + @property + def parsers(self): + """Iterable of parsers provided by this extractor + + Yields + ------ + name : str + Name describing the type of text output by the parser. + parser : callable + Async function that takes a ``text_chunks`` input and + outputs parsed text. + """ + yield "permitted_use_only_text", self.extract_permitted_uses + yield "districts_text", self.extract_sef_permitted_uses + + +def _valid_chunk(chunk): + """True if chunk has content""" + return chunk and "no relevant text" not in chunk.lower() + + +def _store_chunk(parser, chunk_ind, store): + """Store chunk and its neighbors if it is not already stored""" + for offset in range(1 - parser.num_to_recall, 2): + ind_to_grab = chunk_ind + offset + if ind_to_grab < 0 or ind_to_grab >= len(parser.text_chunks): + continue + + store.setdefault(ind_to_grab, parser.text_chunks[ind_to_grab]) diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index be961bcba..000000000 --- a/examples/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# INFRA-COMPASS Examples and Tutorials - -This directory contains several examples/tutorials to help you get started with ``COMPASS``. - -- [`Configuration and Execution Basics`](./execution_basics): Learn how to configure and execute - a ``COMPASS`` run, starting with basic inputs and working your way up to a full configuration file. -- [`Quickstart Demo`](./openai_solar_demo): Small demo designed to get you running ``COMPASS`` - as fast as possible. Requires a personal OpenAI API key. -- [`Parse a Local PDF File`](./parse_existing_docs): A simple example of parsing a local PDF - file for structured solar ordinance values. - -For rendered versions of these guides, see the -[online documentation](https://nrel.github.io/COMPASS/examples/index.html). diff --git a/pixi.lock b/pixi.lock index 55b4dd8c8..19c58d6d2 100644 --- a/pixi.lock +++ b/pixi.lock @@ -6,8 +6,6 @@ environments: - url: https://conda.anaconda.org/microsoft/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -1498,8 +1496,6 @@ environments: - url: https://conda.anaconda.org/microsoft/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -3076,8 +3072,6 @@ environments: - url: https://conda.anaconda.org/microsoft/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -5385,8 +5379,6 @@ environments: - url: https://conda.anaconda.org/microsoft/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -6957,8 +6949,6 @@ environments: - url: https://conda.anaconda.org/microsoft/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -8543,8 +8533,6 @@ environments: - url: https://conda.anaconda.org/microsoft/ indexes: - https://pypi.org/simple - options: - pypi-prerelease-mode: if-necessary-or-explicit packages: linux-64: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 @@ -24485,7 +24473,7 @@ packages: timestamp: 1733408419340 - pypi: ./ name: nrel-compass - version: 0.11.3.dev42+g0555ca3.d20260114 + version: 0.12.1.dev3+g63e1ded.d20260115 sha256: 382673f4a05238e824916000e06e5d9e2b4a35d076f8c9a46eec5ee1d31dec06 requires_dist: - beautifulsoup4>=4.12.3,<5 @@ -24530,6 +24518,7 @@ packages: - pkginfo>=1.12.1.2,<2 ; extra == 'build' - twine>=6.1.0,<7 ; extra == 'build' requires_python: '>=3.12' + editable: true - pypi: https://files.pythonhosted.org/packages/b5/0e/0898bc80431eb17e7d0744321fab1ba1e39fae0ba2c9e8634c042feb31f5/nrel_elm-0.0.34-py3-none-any.whl name: nrel-elm version: 0.0.34 diff --git a/tests/python/unit/extraction/test_extraction_validation.py b/tests/python/unit/extraction/test_extraction_validation.py index d482d7012..7d7d0e9da 100644 --- a/tests/python/unit/extraction/test_extraction_validation.py +++ b/tests/python/unit/extraction/test_extraction_validation.py @@ -6,6 +6,9 @@ from compass.extraction.wind.ordinance import WindHeuristic from compass.extraction.solar.ordinance import SolarHeuristic +from compass.extraction.geothermal_electricity.ordinance import ( + GeothermalElectricityHeuristic, +) @pytest.mark.parametrize( @@ -50,5 +53,43 @@ def test_possibly_mentions_solar(text, truth): assert SolarHeuristic().check(text) == truth +@pytest.mark.parametrize( + "text,truth", + [ + ("Geothermal power plant SETBACKS", True), + ("geothermal electricity generation SETBACKS", True), + ("geothermal facility setbacks", True), + ("geothermal turbine requirements", True), + ("geothermal generator spacing", True), + ("geothermal wellfield setbacks", True), + ("geothermal production well setbacks", True), + ("geothermal power production", True), + ("geothermal electricity generation", True), + ("geothermal generating facilities", True), + ("geothermal overlay zone", True), + ("geothermal heat pump SETBACKS", False), + ("ground source heat pump requirements", False), + ("GSHP setbacks", False), + ("residential geothermal systems", False), + ("geothermal HVAC requirements", False), + ("ground-coupled heat pump", False), + ("geoexchange system regulations", False), + ("closed loop geothermal", False), + ("space heating geothermal", False), + ("district heating geothermal", False), + ("greenhouse heating geothermal", False), + ("accessory geothermal use", False), + ("geothermal direct use", False), + ("geothermal setback turbine", True), + ("geothermal\npower plant", True), + ("geothermal wellfield generator", True), + ], +) +def test_possibly_mentions_geothermal_electricity(text, truth): + """Test for `GeothermalElectricityHeuristic` class (basic execution)""" + + assert GeothermalElectricityHeuristic().check(text) == truth + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])