diff --git a/energyml-utils/.flake8 b/energyml-utils/.flake8 index 07de32c..4830dae 100644 --- a/energyml-utils/.flake8 +++ b/energyml-utils/.flake8 @@ -1,6 +1,6 @@ [flake8] # Ignore specific error codes (comma-separated list) -ignore = E501, E722, W503, F403, E203, E202 +ignore = E501, E722, W503, F403, E203, E202, E402 # Max line length (default is 79, can be changed) max-line-length = 120 diff --git a/energyml-utils/.gitignore b/energyml-utils/.gitignore index 38a850f..016795e 100644 --- a/energyml-utils/.gitignore +++ b/energyml-utils/.gitignore @@ -44,6 +44,7 @@ sample/ gen*/ manip* *.epc +*.h5 *.off *.obj *.log @@ -54,6 +55,13 @@ manip* *.xml *.json +docs/*.md + +# DATA +*.obj +*.geojson +*.vtk +*.stl # WIP diff --git a/energyml-utils/example/epc_stream_keep_open_example.py b/energyml-utils/example/epc_stream_keep_open_example.py new file mode 100644 index 0000000..ea9d9cc --- /dev/null +++ b/energyml-utils/example/epc_stream_keep_open_example.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 +""" +Example demonstrating the keep_open feature of EpcStreamReader. + +This example shows how using keep_open=True improves performance when +performing multiple operations on an EPC file by keeping the ZIP file +open instead of reopening it for each operation. +""" + +import time +import sys +from pathlib import Path + +# Add src directory to path +src_path = Path(__file__).parent.parent / "src" +sys.path.insert(0, str(src_path)) + +from energyml.utils.epc_stream import EpcStreamReader + + +def benchmark_without_keep_open(epc_path: str, num_operations: int = 10): + """Benchmark reading objects without keep_open.""" + print(f"\nBenchmark WITHOUT keep_open ({num_operations} operations):") + print("=" * 60) + + start = time.time() + + # Create reader without keep_open + with EpcStreamReader(epc_path, keep_open=False, cache_size=5) as reader: + metadata_list = reader.list_object_metadata() + + if not metadata_list: + print(" No objects in EPC file") + return 0 + + # Perform multiple read operations + for i in range(min(num_operations, len(metadata_list))): + meta = metadata_list[i % len(metadata_list)] + if meta.identifier: + _ = reader.get_object_by_identifier(meta.identifier) + if i == 0: + print(f" First object: {meta.object_type}") + + elapsed = time.time() - start + print(f" Time: {elapsed:.4f}s") + print(f" Avg per operation: {elapsed / num_operations:.4f}s") + + return elapsed + + +def benchmark_with_keep_open(epc_path: str, num_operations: int = 10): + """Benchmark reading objects with keep_open.""" + print(f"\nBenchmark WITH keep_open ({num_operations} operations):") + print("=" * 60) + + start = time.time() + + # Create reader with keep_open + with EpcStreamReader(epc_path, keep_open=True, cache_size=5) as reader: + metadata_list = reader.list_object_metadata() + + if not metadata_list: + print(" No objects in EPC file") + return 0 + + # Perform multiple read operations + for i in range(min(num_operations, len(metadata_list))): + meta = metadata_list[i % len(metadata_list)] + if meta.identifier: + _ = reader.get_object_by_identifier(meta.identifier) + if i == 0: + print(f" First object: {meta.object_type}") + + elapsed = time.time() - start + print(f" Time: {elapsed:.4f}s") + print(f" Avg per operation: {elapsed / num_operations:.4f}s") + + return elapsed + + +def demonstrate_file_modification_with_keep_open(epc_path: str): + """Demonstrate that modifications work correctly with keep_open.""" + print("\nDemonstrating file modifications with keep_open:") + print("=" * 60) + + with EpcStreamReader(epc_path, keep_open=True) as reader: + metadata_list = reader.list_object_metadata() + original_count = len(metadata_list) + print(f" Original object count: {original_count}") + + if metadata_list: + # Get first object + first_obj = reader.get_object_by_identifier(metadata_list[0].identifier) + print(f" Retrieved object: {metadata_list[0].object_type}") + + # Update the object (re-add it) + identifier = reader.update_object(first_obj) + print(f" Updated object: {identifier}") + + # Verify we can still read it after update + updated_obj = reader.get_object_by_identifier(identifier) + assert updated_obj is not None, "Failed to read object after update" + print(" ✓ Object successfully read after update") + + # Verify object count is the same + new_metadata_list = reader.list_object_metadata() + new_count = len(new_metadata_list) + print(f" New object count: {new_count}") + + if new_count == original_count: + print(" ✓ Object count unchanged (correct)") + else: + print(f" ✗ Object count changed: {original_count} -> {new_count}") + + +def demonstrate_proper_cleanup(): + """Demonstrate that persistent ZIP file is properly closed.""" + print("\nDemonstrating proper cleanup:") + print("=" * 60) + + temp_path = "temp_test.epc" + + try: + # Create a temporary EPC file + reader = EpcStreamReader(temp_path, keep_open=True) + print(" Created EpcStreamReader with keep_open=True") + + # Manually close + reader.close() + print(" ✓ Manually closed reader") + + # Create another reader and let it go out of scope + reader2 = EpcStreamReader(temp_path, keep_open=True) + print(" Created second EpcStreamReader") + del reader2 + print(" ✓ Reader deleted (automatic cleanup via __del__)") + + # Create reader in context manager + with EpcStreamReader(temp_path, keep_open=True) as _: + print(" Created third EpcStreamReader in context manager") + print(" ✓ Context manager exited (automatic cleanup)") + + finally: + # Clean up temp file + if Path(temp_path).exists(): + Path(temp_path).unlink() + + +def main(): + """Run all examples.""" + print("EpcStreamReader keep_open Feature Demonstration") + print("=" * 60) + + # You'll need to provide a valid EPC file path + epc_path = "wip/epc_test.epc" + + if not Path(epc_path).exists(): + print(f"\nError: EPC file not found: {epc_path}") + print("Please provide a valid EPC file path in the script.") + print("\nRunning cleanup demonstration only:") + demonstrate_proper_cleanup() + return + + try: + # Run benchmarks + num_ops = 20 + + time_without = benchmark_without_keep_open(epc_path, num_ops) + time_with = benchmark_with_keep_open(epc_path, num_ops) + + # Show comparison + print("\n" + "=" * 60) + print("Performance Comparison:") + print("=" * 60) + if time_with > 0 and time_without > 0: + speedup = time_without / time_with + improvement = ((time_without - time_with) / time_without) * 100 + print(f" Speedup: {speedup:.2f}x") + print(f" Improvement: {improvement:.1f}%") + + if speedup > 1.1: + print("\n ✓ keep_open=True significantly improves performance!") + elif speedup > 1.0: + print("\n ✓ keep_open=True slightly improves performance") + else: + print("\n Note: For this workload, the difference is minimal") + print(" (cache effects or small file)") + + # Demonstrate modifications + demonstrate_file_modification_with_keep_open(epc_path) + + # Demonstrate cleanup + demonstrate_proper_cleanup() + + print("\n" + "=" * 60) + print("All demonstrations completed successfully!") + print("=" * 60) + + except Exception as e: + print(f"\nError: {e}") + import traceback + + traceback.print_exc() + + +if __name__ == "__main__": + main() diff --git a/energyml-utils/example/main.py b/energyml-utils/example/main.py index 6301e7c..4313ed5 100644 --- a/energyml-utils/example/main.py +++ b/energyml-utils/example/main.py @@ -1,14 +1,27 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 import sys +import logging from pathlib import Path import re from dataclasses import fields +from energyml.utils.constants import ( + RGX_CONTENT_TYPE, + EpcExportVersion, + date_to_epoch, + epoch, + epoch_to_date, + gen_uuid, + get_domain_version_from_content_or_qualified_type, + parse_content_or_qualified_type, + parse_content_type, +) + src_path = Path(__file__).parent.parent / "src" sys.path.insert(0, str(src_path)) -from energyml.eml.v2_3.commonv2 import * +from energyml.eml.v2_3.commonv2 import Citation, DataObjectReference, ExistenceKind, Activity from energyml.eml.v2_3.commonv2 import AbstractObject from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array from energyml.resqml.v2_0_1.resqmlv2 import TriangulatedSetRepresentation as Tr20 @@ -22,17 +35,70 @@ # from src.energyml.utils.data.hdf import * from energyml.utils.data.helper import get_projected_uom, is_z_reversed -from energyml.utils.epc import * -from energyml.utils.introspection import * -from energyml.utils.manager import * -from energyml.utils.serialization import * +from energyml.utils.epc import ( + Epc, + EPCRelsRelationshipType, + as_dor, + create_energyml_object, + create_external_part_reference, + gen_energyml_object_path, + get_reverse_dor_list, +) +from energyml.utils.introspection import ( + class_match_rgx, + copy_attributes, + get_class_attributes, + get_class_fields, + get_class_from_content_type, + get_class_from_name, + get_class_from_qualified_type, + get_class_methods, + get_content_type_from_class, + get_obj_pkg_pkgv_type_uuid_version, + get_obj_uri, + get_object_attribute, + get_obj_uuid, + get_object_attribute_rgx, + get_qualified_type_from_class, + is_abstract, + is_primitive, + random_value_from_class, + search_attribute_matching_name, + search_attribute_matching_name_with_path, + search_attribute_matching_type, + search_attribute_matching_type_with_path, +) +from energyml.utils.manager import ( + # create_energyml_object, + # create_external_part_reference, + dict_energyml_modules, + get_class_pkg, + get_class_pkg_version, + get_classes_matching_name, + get_sub_classes, + list_energyml_modules, +) +from energyml.utils.serialization import ( + read_energyml_xml_file, + read_energyml_xml_str, + serialize_json, + JSON_VERSION, + serialize_xml, +) from energyml.utils.validation import ( patterns_validation, dor_validation, validate_epc, correct_dor, ) -from energyml.utils.xml import * +from energyml.utils.xml import ( + find_schema_version_in_element, + get_class_name_from_xml, + get_root_namespace, + get_root_type, + get_tree, + get_xml_encoding, +) from energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path fi_cit = Citation( diff --git a/energyml-utils/example/main_data.py b/energyml-utils/example/main_data.py index a05cd20..52ff8ee 100644 --- a/energyml-utils/example/main_data.py +++ b/energyml-utils/example/main_data.py @@ -1,6 +1,7 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 - +import logging +from io import BytesIO from energyml.eml.v2_3.commonv2 import ( JaggedArray, AbstractValueArray, @@ -8,16 +9,27 @@ StringXmlArray, IntegerXmlArray, ) +from energyml.utils.data.export import export_obj from src.energyml.utils.data.helper import ( get_array_reader_function, + read_array, +) +from src.energyml.utils.data.mesh import ( + GeoJsonGeometryType, + MeshFileFormat, + _create_shape, + _write_geojson_shape, + export_multiple_data, + export_off, + read_mesh_object, ) -from src.energyml.utils.data.mesh import * -from src.energyml.utils.data.mesh import _create_shape, _write_geojson_shape from src.energyml.utils.epc import gen_energyml_object_path from src.energyml.utils.introspection import ( + get_object_attribute, is_abstract, get_obj_uuid, + search_attribute_matching_name_with_path, ) from src.energyml.utils.manager import get_sub_classes from src.energyml.utils.serialization import ( @@ -28,11 +40,17 @@ ) from src.energyml.utils.validation import validate_epc from src.energyml.utils.xml import get_tree -from utils.data.datasets_io import ( +from src.energyml.utils.data.datasets_io import ( HDF5FileReader, get_path_in_external_with_path, get_external_file_path_from_external_path, ) +from energyml.utils.epc import Epc +from src.energyml.utils.data.mesh import ( + read_polyline_representation, + read_point_representation, + read_grid2d_representation, +) logger = logging.getLogger(__name__) @@ -607,7 +625,7 @@ def test_simple_geojson(): ), ) - print(f"\n+++++++++++++++++++++++++\n") + print("\n+++++++++++++++++++++++++\n") def test_simple_geojson_io(): diff --git a/energyml-utils/example/main_datasets.py b/energyml-utils/example/main_datasets.py index edc1278..234ed43 100644 --- a/energyml-utils/example/main_datasets.py +++ b/energyml-utils/example/main_datasets.py @@ -1,15 +1,15 @@ # Copyright (c) 2023-2024 Geosiris. # SPDX-License-Identifier: Apache-2.0 -from src.energyml.utils.data.datasets_io import ( +from energyml.utils.data.datasets_io import ( ParquetFileReader, ParquetFileWriter, CSVFileReader, CSVFileWriter, read_dataset, ) -from utils.data.helper import read_array -from utils.introspection import search_attribute_matching_name_with_path -from utils.serialization import read_energyml_xml_file +from energyml.utils.data.helper import read_array +from energyml.utils.introspection import search_attribute_matching_name_with_path +from energyml.utils.serialization import read_energyml_xml_file def local_parquet(): diff --git a/energyml-utils/example/main_hdf.py b/energyml-utils/example/main_hdf.py deleted file mode 100644 index ac23ed4..0000000 --- a/energyml-utils/example/main_hdf.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2023-2024 Geosiris. -# SPDX-License-Identifier: Apache-2.0 -import sys -from pathlib import Path - -# Add src directory to path -src_path = Path(__file__).parent.parent / "src" -sys.path.insert(0, str(src_path)) - -from energyml.utils.data.datasets_io import get_path_in_external_with_path -from energyml.utils.introspection import get_obj_uri - - -if __name__ == "__main__": - from energyml.utils.epc import Epc - - # Create an EPC file - epc = Epc.read_file("wip/BRGM_AVRE_all_march_25.epc") - - print("\n".join(map(lambda o: str(get_obj_uri(o)), epc.energyml_objects))) - - print(epc.get_h5_file_paths("eml:///resqml22.PolylineSetRepresentation(e75db94d-a251-4f31-8a24-23b9573fbf39)")) - - print( - get_path_in_external_with_path( - epc.get_object_by_identifier( - "eml:///resqml22.PolylineSetRepresentation(e75db94d-a251-4f31-8a24-23b9573fbf39)" - ) - ) - ) - - print( - epc.read_h5_dataset( - "eml:///resqml22.PolylineSetRepresentation(e75db94d-a251-4f31-8a24-23b9573fbf39)", - "/RESQML/e75db94d-a251-4f31-8a24-23b9573fbf39/points_patch0", - ) - ) diff --git a/energyml-utils/example/main_stream.py b/energyml-utils/example/main_stream.py index b1a712a..87f529a 100644 --- a/energyml-utils/example/main_stream.py +++ b/energyml-utils/example/main_stream.py @@ -24,12 +24,13 @@ from energyml.utils.serialization import serialize_json +from energyml.resqml.v2_2.resqmlv2 import TriangulatedSetRepresentation, ContactElement +from energyml.eml.v2_3.commonv2 import DataObjectReference + + def test_epc_stream_main(): logging.basicConfig(level=logging.DEBUG) - from energyml.resqml.v2_2.resqmlv2 import TriangulatedSetRepresentation, ContactElement - from energyml.eml.v2_3.commonv2 import DataObjectReference - # Use the test EPC file test_epc = "wip/my_stream_file.epc" @@ -115,9 +116,6 @@ def test_epc_stream_main(): def test_epc_im_main(): logging.basicConfig(level=logging.DEBUG) - from energyml.resqml.v2_2.resqmlv2 import TriangulatedSetRepresentation, ContactElement - from energyml.eml.v2_3.commonv2 import DataObjectReference - # Use the test EPC file test_epc = "wip/my_stream_file.epc" diff --git a/energyml-utils/example/main_test_3D.py b/energyml-utils/example/main_test_3D.py new file mode 100644 index 0000000..0657bdf --- /dev/null +++ b/energyml-utils/example/main_test_3D.py @@ -0,0 +1,145 @@ +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 +import os +import re +import datetime +from pathlib import Path +import traceback +from typing import Optional + +from energyml.utils.data.export import export_obj, export_stl, export_vtk +from energyml.utils.data.mesh import read_mesh_object +from energyml.utils.epc_stream import EpcStreamReader +from energyml.utils.epc import Epc + +from energyml.utils.exception import NotSupportedError + + +def export_all_representation(epc_path: str, output_dir: str, regex_type_filter: Optional[str] = None): + + storage = EpcStreamReader(epc_path, keep_open=True) + + dt = datetime.datetime.now().strftime("%Hh%M_%d-%m-%Y") + not_supported_types = set() + for mdata in storage.list_objects(): + if "Representation" in mdata.object_type and ( + regex_type_filter is None + or len(regex_type_filter) == 0 + or re.search(regex_type_filter, mdata.object_type, flags=re.IGNORECASE) + ): + logging.info(f"Exporting representation: {mdata.object_type} ({mdata.uuid})") + energyml_obj = storage.get_object_by_uuid(mdata.uuid)[0] + try: + mesh_list = read_mesh_object( + energyml_object=energyml_obj, + workspace=storage, + use_crs_displacement=True, + ) + + os.makedirs(output_dir, exist_ok=True) + + path = Path(output_dir) / f"{dt}-{mdata.object_type}{mdata.uuid}_mesh.obj" + with path.open("wb") as f: + export_obj( + mesh_list=mesh_list, + out=f, + ) + export_stl_path = path.with_suffix(".stl") + with export_stl_path.open("wb") as stl_f: + export_stl( + mesh_list=mesh_list, + out=stl_f, + ) + export_vtk_path = path.with_suffix(".vtk") + with export_vtk_path.open("wb") as vtk_f: + export_vtk( + mesh_list=mesh_list, + out=vtk_f, + ) + + logging.info(f" ✓ Exported to {path.name}") + except NotSupportedError: + # print(f" ✗ Not supported: {e}") + not_supported_types.add(mdata.object_type) + except Exception: + traceback.print_exc() + + logging.info("Export completed.") + if not_supported_types: + logging.info("Not supported representation types encountered:") + for t in not_supported_types: + logging.info(f" - {t}") + + +def export_all_representation_in_memory(epc_path: str, output_dir: str, regex_type_filter: Optional[str] = None): + + storage = Epc.read_file(epc_path) + if storage is None: + logging.error(f"Failed to read EPC file: {epc_path}") + return + + dt = datetime.datetime.now().strftime("%Hh%M_%d-%m-%Y") + not_supported_types = set() + for mdata in storage.list_objects(): + if "Representation" in mdata.object_type and ( + regex_type_filter is None + or len(regex_type_filter) == 0 + or re.search(regex_type_filter, mdata.object_type, flags=re.IGNORECASE) + ): + logging.info(f"Exporting representation: {mdata.object_type} ({mdata.uuid})") + energyml_obj = storage.get_object_by_uuid(mdata.uuid)[0] + try: + mesh_list = read_mesh_object( + energyml_object=energyml_obj, + workspace=storage, + use_crs_displacement=True, + ) + + os.makedirs(output_dir, exist_ok=True) + + path = Path(output_dir) / f"{dt}-{mdata.object_type}{mdata.uuid}_mesh.obj" + with path.open("wb") as f: + export_obj( + mesh_list=mesh_list, + out=f, + ) + export_stl_path = path.with_suffix(".stl") + with export_stl_path.open("wb") as stl_f: + export_stl( + mesh_list=mesh_list, + out=stl_f, + ) + export_vtk_path = path.with_suffix(".vtk") + with export_vtk_path.open("wb") as vtk_f: + export_vtk( + mesh_list=mesh_list, + out=vtk_f, + ) + + logging.info(f" ✓ Exported to {path.name}") + except NotSupportedError: + # print(f" ✗ Not supported: {e}") + not_supported_types.add(mdata.object_type) + except Exception: + traceback.print_exc() + + logging.info("Export completed.") + if not_supported_types: + logging.info("Not supported representation types encountered:") + for t in not_supported_types: + logging.info(f" - {t}") + + +# $env:PYTHONPATH="$(pwd)\src"; poetry run python example/main_test_3D.py +if __name__ == "__main__": + import logging + + logging.basicConfig(level=logging.DEBUG) + # epc_file = "rc/epc/testingPackageCpp.epc" + epc_file = "rc/epc/output-val.epc" + # epc_file = "rc/epc/Volve_Horizons_and_Faults_Depth_originEQN.epc" + output_directory = Path("exported_meshes") / Path(epc_file).name.replace(".epc", "_3D_export") + # export_all_representation(epc_file, output_directory) + # export_all_representation(epc_file, output_directory, regex_type_filter="Wellbore") + # export_all_representation(epc_file, str(output_directory), regex_type_filter="") + export_all_representation_in_memory(epc_file, str(output_directory), regex_type_filter="") diff --git a/energyml-utils/example/tools.py b/energyml-utils/example/tools.py index 3c889ba..20dfe69 100644 --- a/energyml-utils/example/tools.py +++ b/energyml-utils/example/tools.py @@ -291,7 +291,7 @@ def generate_data(): "-ff", type=str, default="json", - help=f"Type of the output files (one of : ['json', 'xml']). Default is 'json'", + help="Type of the output files (one of : ['json', 'xml']). Default is 'json'", ) args = parser.parse_args() @@ -413,7 +413,7 @@ def xml_to_json(): def json_to_xml(): parser = argparse.ArgumentParser() parser.add_argument("--file", "-f", type=str, help="Input File") - parser.add_argument("--out", "-o", type=str, default=None, help=f"Output file") + parser.add_argument("--out", "-o", type=str, default=None, help="Output file") args = parser.parse_args() @@ -436,7 +436,7 @@ def json_to_xml(): def json_to_epc(): parser = argparse.ArgumentParser() parser.add_argument("--file", "-f", type=str, help="Input File") - parser.add_argument("--out", "-o", type=str, default=None, help=f"Output EPC file") + parser.add_argument("--out", "-o", type=str, default=None, help="Output EPC file") args = parser.parse_args() diff --git a/energyml-utils/src/energyml/utils/data/datasets_io.py b/energyml-utils/src/energyml/utils/data/datasets_io.py index 3325eeb..d899015 100644 --- a/energyml-utils/src/energyml/utils/data/datasets_io.py +++ b/energyml-utils/src/energyml/utils/data/datasets_io.py @@ -54,61 +54,98 @@ # HDF5 if __H5PY_MODULE_EXISTS__: - def h5_list_datasets(h5_file_path: Union[BytesIO, str]) -> List[str]: + def h5_list_datasets(h5_file_path: Union[BytesIO, str, "h5py.File"]) -> List[str]: """ List all datasets in an HDF5 file. - :param h5_file_path: Path to the HDF5 file + :param h5_file_path: Path to the HDF5 file, BytesIO object, or an already opened h5py.File :return: List of dataset names in the HDF5 file """ res = [] - with h5py.File(h5_file_path, "r") as f: # type: ignore - # Function to print the names of all datasets + + # Check if it's already an opened h5py.File + if isinstance(h5_file_path, h5py.File): # type: ignore + def list_datasets(name, obj): - if isinstance(obj, h5py.Dataset): # Check if the object is a dataset # type: ignore + if isinstance(obj, h5py.Dataset): # type: ignore res.append(name) - # Visit all items in the HDF5 file and apply the list function - f.visititems(list_datasets) + h5_file_path.visititems(list_datasets) + else: + with h5py.File(h5_file_path, "r") as f: # type: ignore + # Function to print the names of all datasets + def list_datasets(name, obj): + if isinstance(obj, h5py.Dataset): # Check if the object is a dataset # type: ignore + res.append(name) + + # Visit all items in the HDF5 file and apply the list function + f.visititems(list_datasets) return res @dataclass class HDF5FileReader(DatasetReader): # noqa: F401 - def read_array(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[np.ndarray]: - with h5py.File(source, "r") as f: # type: ignore - d_group = f[path_in_external_file] + def read_array( + self, source: Union[BytesIO, str, "h5py.File"], path_in_external_file: str + ) -> Optional[np.ndarray]: + # Check if it's already an opened h5py.File + if isinstance(source, h5py.File): # type: ignore + d_group = source[path_in_external_file] return d_group[()] # type: ignore - - def get_array_dimension(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[List[int]]: - with h5py.File(source, "r") as f: # type: ignore - return list(f[path_in_external_file].shape) + else: + with h5py.File(source, "r") as f: # type: ignore + d_group = f[path_in_external_file] + return d_group[()] # type: ignore + + def get_array_dimension( + self, source: Union[BytesIO, str, "h5py.File"], path_in_external_file: str + ) -> Optional[List[int]]: + # Check if it's already an opened h5py.File + if isinstance(source, h5py.File): # type: ignore + return list(source[path_in_external_file].shape) + else: + with h5py.File(source, "r") as f: # type: ignore + return list(f[path_in_external_file].shape) def extract_h5_datasets( self, - input_h5: Union[BytesIO, str], - output_h5: Union[BytesIO, str], + input_h5: Union[BytesIO, str, "h5py.File"], + output_h5: Union[BytesIO, str, "h5py.File"], h5_datasets_paths: List[str], ) -> None: """ Copy all dataset from :param input_h5 matching with paths in :param h5_datasets_paths into the :param output - :param input_h5: - :param output_h5: + :param input_h5: Path to HDF5 file, BytesIO, or already opened h5py.File + :param output_h5: Path to HDF5 file, BytesIO, or already opened h5py.File :param h5_datasets_paths: :return: """ if h5_datasets_paths is None: h5_datasets_paths = h5_list_datasets(input_h5) if len(h5_datasets_paths) > 0: - with h5py.File(output_h5, "a") as f_dest: # type: ignore - with h5py.File(input_h5, "r") as f_src: # type: ignore + # Handle output file + should_close_dest = not isinstance(output_h5, h5py.File) # type: ignore + f_dest = output_h5 if isinstance(output_h5, h5py.File) else h5py.File(output_h5, "a") # type: ignore + + try: + # Handle input file + should_close_src = not isinstance(input_h5, h5py.File) # type: ignore + f_src = input_h5 if isinstance(input_h5, h5py.File) else h5py.File(input_h5, "r") # type: ignore + + try: for dataset in h5_datasets_paths: f_dest.create_dataset(dataset, data=f_src[dataset]) + finally: + if should_close_src: + f_src.close() + finally: + if should_close_dest: + f_dest.close() @dataclass class HDF5FileWriter: def write_array( self, - target: Union[str, BytesIO, bytes], + target: Union[str, BytesIO, bytes, "h5py.File"], array: Union[list, np.ndarray], path_in_external_file: str, dtype: Optional[np.dtype] = None, @@ -119,32 +156,53 @@ def write_array( if dtype is not None and not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) - with h5py.File(target, "a") as f: # type: ignore - # print(array.dtype, h5py.string_dtype(), array.dtype == 'O') - # print("\t", dtype or (h5py.string_dtype() if array.dtype == '0' else array.dtype)) + # Check if it's already an opened h5py.File + if isinstance(target, h5py.File): # type: ignore if isinstance(array, np.ndarray) and array.dtype == "O": array = np.asarray([s.encode() if isinstance(s, str) else s for s in array]) np.void(array) - dset = f.create_dataset(path_in_external_file, array.shape, dtype or array.dtype) + dset = target.create_dataset(path_in_external_file, array.shape, dtype or array.dtype) dset[()] = array + else: + with h5py.File(target, "a") as f: # type: ignore + # print(array.dtype, h5py.string_dtype(), array.dtype == 'O') + # print("\t", dtype or (h5py.string_dtype() if array.dtype == '0' else array.dtype)) + if isinstance(array, np.ndarray) and array.dtype == "O": + array = np.asarray([s.encode() if isinstance(s, str) else s for s in array]) + np.void(array) + dset = f.create_dataset(path_in_external_file, array.shape, dtype or array.dtype) + dset[()] = array else: class HDF5FileReader: - def read_array(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[np.ndarray]: + def read_array(self, source: Union[BytesIO, str, Any], path_in_external_file: str) -> Optional[np.ndarray]: raise MissingExtraInstallation(extra_name="hdf5") - def get_array_dimension(self, source: Union[BytesIO, str], path_in_external_file: str) -> Optional[np.ndarray]: + def get_array_dimension( + self, source: Union[BytesIO, str, Any], path_in_external_file: str + ) -> Optional[np.ndarray]: raise MissingExtraInstallation(extra_name="hdf5") def extract_h5_datasets( self, - input_h5: Union[BytesIO, str], - output_h5: Union[BytesIO, str], + input_h5: Union[BytesIO, str, Any], + output_h5: Union[BytesIO, str, Any], h5_datasets_paths: List[str], ) -> None: raise MissingExtraInstallation(extra_name="hdf5") + class HDF5FileWriter: + + def write_array( + self, + target: Union[str, BytesIO, bytes, Any], + array: Union[list, np.ndarray], + path_in_external_file: str, + dtype: Optional[np.dtype] = None, + ): + raise MissingExtraInstallation(extra_name="hdf5") + # APACHE PARQUET if __PARQUET_MODULE_EXISTS__: diff --git a/energyml-utils/src/energyml/utils/data/export.py b/energyml-utils/src/energyml/utils/data/export.py new file mode 100644 index 0000000..48d9681 --- /dev/null +++ b/energyml-utils/src/energyml/utils/data/export.py @@ -0,0 +1,489 @@ +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 +""" +Module for exporting mesh data to various file formats. +Supports OBJ, GeoJSON, VTK, and STL formats. +""" + +import json +import struct +from enum import Enum +from pathlib import Path +from typing import TYPE_CHECKING, BinaryIO, List, Optional, TextIO, Union + +import numpy as np + +if TYPE_CHECKING: + from .mesh import AbstractMesh + + +class ExportFormat(Enum): + """Supported mesh export formats.""" + + OBJ = "obj" + GEOJSON = "geojson" + VTK = "vtk" + STL = "stl" + + @classmethod + def from_extension(cls, extension: str) -> "ExportFormat": + """Get format from file extension.""" + ext = extension.lower().lstrip(".") + for fmt in cls: + if fmt.value == ext: + return fmt + raise ValueError(f"Unsupported file extension: {extension}") + + @classmethod + def all_extensions(cls) -> List[str]: + """Get all supported file extensions.""" + return [fmt.value for fmt in cls] + + +class ExportOptions: + """Base class for export options.""" + + pass + + +class STLExportOptions(ExportOptions): + """Options for STL export.""" + + def __init__(self, binary: bool = True, ascii_precision: int = 6): + """ + Initialize STL export options. + + :param binary: If True, export as binary STL; if False, export as ASCII STL + :param ascii_precision: Number of decimal places for ASCII format + """ + self.binary = binary + self.ascii_precision = ascii_precision + + +class VTKExportOptions(ExportOptions): + """Options for VTK export.""" + + def __init__(self, binary: bool = False, dataset_name: str = "mesh"): + """ + Initialize VTK export options. + + :param binary: If True, export as binary VTK; if False, export as ASCII VTK + :param dataset_name: Name of the dataset in VTK file + """ + self.binary = binary + self.dataset_name = dataset_name + + +class GeoJSONExportOptions(ExportOptions): + """Options for GeoJSON export.""" + + def __init__(self, indent: Optional[int] = 2, properties: Optional[dict] = None): + """ + Initialize GeoJSON export options. + + :param indent: JSON indentation level (None for compact) + :param properties: Additional properties to include in features + """ + self.indent = indent + self.properties = properties or {} + + +def export_obj(mesh_list: List["AbstractMesh"], out: BinaryIO, obj_name: Optional[str] = None) -> None: + """ + Export mesh data to Wavefront OBJ format. + + :param mesh_list: List of AbstractMesh objects to export + :param out: Binary output stream + :param obj_name: Optional object name for the OBJ file + """ + # Lazy import to avoid circular dependency + from .mesh import PolylineSetMesh + + # Write header + out.write(b"# Generated by energyml-utils a Geosiris python module\n\n") + + # Write object name if provided + if obj_name is not None: + out.write(f"o {obj_name}\n\n".encode("utf-8")) + + point_offset = 0 + + for mesh in mesh_list: + # Write group name using mesh identifier or uuid + mesh_id = getattr(mesh, "identifier", None) or getattr(mesh, "uuid", "mesh") + out.write(f"g {mesh_id}\n\n".encode("utf-8")) + + # Write vertices + for point in mesh.point_list: + if len(point) > 0: + out.write(f"v {' '.join(map(str, point))}\n".encode("utf-8")) + + # Write faces or lines depending on mesh type + indices = mesh.get_indices() + elt_letter = "l" if isinstance(mesh, PolylineSetMesh) else "f" + + for face_or_line in indices: + if len(face_or_line) > 1: + # OBJ indices are 1-based + indices_str = " ".join(str(idx + point_offset + 1) for idx in face_or_line) + out.write(f"{elt_letter} {indices_str}\n".encode("utf-8")) + + point_offset += len(mesh.point_list) + + +def export_geojson( + mesh_list: List["AbstractMesh"], out: TextIO, options: Optional[GeoJSONExportOptions] = None +) -> None: + """ + Export mesh data to GeoJSON format. + + :param mesh_list: List of AbstractMesh objects to export + :param out: Text output stream + :param options: GeoJSON export options + """ + # Lazy import to avoid circular dependency + from .mesh import PolylineSetMesh, SurfaceMesh + + if options is None: + options = GeoJSONExportOptions() + + features = [] + + for mesh_idx, mesh in enumerate(mesh_list): + indices = mesh.get_indices() + + if isinstance(mesh, PolylineSetMesh): + # Export as LineString features + for line_idx, line_indices in enumerate(indices): + if len(line_indices) < 2: + continue + coordinates = [list(mesh.point_list[idx]) for idx in line_indices] + feature = { + "type": "Feature", + "geometry": {"type": "LineString", "coordinates": coordinates}, + "properties": {"mesh_index": mesh_idx, "line_index": line_idx, **options.properties}, + } + features.append(feature) + + elif isinstance(mesh, SurfaceMesh): + # Export as Polygon features + for face_idx, face_indices in enumerate(indices): + if len(face_indices) < 3: + continue + # GeoJSON Polygon requires closed ring (first point == last point) + coordinates = [list(mesh.point_list[idx]) for idx in face_indices] + coordinates.append(coordinates[0]) # Close the ring + + feature = { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [coordinates]}, + "properties": {"mesh_index": mesh_idx, "face_index": face_idx, **options.properties}, + } + features.append(feature) + + geojson = {"type": "FeatureCollection", "features": features} + + json.dump(geojson, out, indent=options.indent) + + +def export_vtk(mesh_list: List["AbstractMesh"], out: BinaryIO, options: Optional[VTKExportOptions] = None) -> None: + """ + Export mesh data to VTK legacy format. + + :param mesh_list: List of AbstractMesh objects to export + :param out: Binary output stream + :param options: VTK export options + """ + # Lazy import to avoid circular dependency + from .mesh import PolylineSetMesh, SurfaceMesh + + if options is None: + options = VTKExportOptions() + + # Combine all meshes + all_points = [] + all_polygons = [] + all_lines = [] + vertex_offset = 0 + + for mesh in mesh_list: + all_points.extend(mesh.point_list) + indices = mesh.get_indices() + + if isinstance(mesh, SurfaceMesh): + # Adjust face indices + for face in indices: + adjusted_face = [idx + vertex_offset for idx in face] + all_polygons.append(adjusted_face) + elif isinstance(mesh, PolylineSetMesh): + # Adjust line indices + for line in indices: + adjusted_line = [idx + vertex_offset for idx in line] + all_lines.append(adjusted_line) + + vertex_offset += len(mesh.point_list) + + # Write VTK header + out.write(b"# vtk DataFile Version 3.0\n") + out.write(f"{options.dataset_name}\n".encode("utf-8")) + out.write(b"ASCII\n") + out.write(b"DATASET POLYDATA\n") + + # Write points + out.write(f"POINTS {len(all_points)} float\n".encode("utf-8")) + for point in all_points: + out.write(f"{point[0]} {point[1]} {point[2]}\n".encode("utf-8")) + + # Write polygons + if all_polygons: + total_poly_size = sum(len(poly) + 1 for poly in all_polygons) + out.write(f"POLYGONS {len(all_polygons)} {total_poly_size}\n".encode("utf-8")) + for poly in all_polygons: + out.write(f"{len(poly)} {' '.join(str(idx) for idx in poly)}\n".encode("utf-8")) + + # Write lines + if all_lines: + total_line_size = sum(len(line) + 1 for line in all_lines) + out.write(f"LINES {len(all_lines)} {total_line_size}\n".encode("utf-8")) + for line in all_lines: + out.write(f"{len(line)} {' '.join(str(idx) for idx in line)}\n".encode("utf-8")) + + +def export_stl(mesh_list: List["AbstractMesh"], out: BinaryIO, options: Optional[STLExportOptions] = None) -> None: + """ + Export mesh data to STL format (binary or ASCII). + + Note: STL format only supports triangles. Only triangular faces will be exported. + + :param mesh_list: List of AbstractMesh objects to export + :param out: Binary output stream + :param options: STL export options + """ + # Lazy import to avoid circular dependency + from .mesh import SurfaceMesh + + if options is None: + options = STLExportOptions(binary=True) + + # Collect all triangles (only from SurfaceMesh with triangular faces) + all_triangles = [] + for mesh in mesh_list: + if isinstance(mesh, SurfaceMesh): + indices = mesh.get_indices() + for face in indices: + # Only export triangular faces + if len(face) == 3: + p0 = np.array(mesh.point_list[face[0]]) + p1 = np.array(mesh.point_list[face[1]]) + p2 = np.array(mesh.point_list[face[2]]) + all_triangles.append((p0, p1, p2)) + + if options.binary: + _export_stl_binary(all_triangles, out) + else: + _export_stl_ascii(all_triangles, out, options.ascii_precision) + + +def _export_stl_binary(triangles: List[tuple], out: BinaryIO) -> None: + """Export STL in binary format.""" + # Write 80-byte header + header = b"Binary STL file generated by energyml-utils" + b"\0" * (80 - 44) + out.write(header) + + # Write number of triangles + out.write(struct.pack(" 0: + normal = normal / norm + else: + normal = np.array([0.0, 0.0, 0.0]) + + # Write normal + out.write(struct.pack(" None: + """Export STL in ASCII format.""" + out.write(b"solid mesh\n") + + for p0, p1, p2 in triangles: + # Calculate normal vector + v1 = p1 - p0 + v2 = p2 - p0 + normal = np.cross(v1, v2) + norm = np.linalg.norm(normal) + if norm > 0: + normal = normal / norm + else: + normal = np.array([0.0, 0.0, 0.0]) + + # Write facet + line = f" facet normal {normal[0]:.{precision}e} {normal[1]:.{precision}e} {normal[2]:.{precision}e}\n" + out.write(line.encode("utf-8")) + out.write(b" outer loop\n") + + for point in [p0, p1, p2]: + line = f" vertex {point[0]:.{precision}e} {point[1]:.{precision}e} {point[2]:.{precision}e}\n" + out.write(line.encode("utf-8")) + + out.write(b" endloop\n") + out.write(b" endfacet\n") + + out.write(b"endsolid mesh\n") + + +def export_mesh( + mesh_list: List["AbstractMesh"], + output_path: Union[str, Path], + format: Optional[ExportFormat] = None, + options: Optional[ExportOptions] = None, +) -> None: + """ + Export mesh data to a file in the specified format. + + :param mesh_list: List of Mesh objects to export + :param output_path: Output file path + :param format: Export format (auto-detected from extension if None) + :param options: Format-specific export options + """ + path = Path(output_path) + + # Auto-detect format from extension if not specified + if format is None: + format = ExportFormat.from_extension(path.suffix) + + # Determine if file should be opened in binary or text mode + binary_formats = {ExportFormat.OBJ, ExportFormat.STL, ExportFormat.VTK} + text_formats = {ExportFormat.GEOJSON} + + if format in binary_formats: + with path.open("wb") as f: + if format == ExportFormat.OBJ: + export_obj(mesh_list, f) + elif format == ExportFormat.STL: + export_stl(mesh_list, f, options) + elif format == ExportFormat.VTK: + export_vtk(mesh_list, f, options) + elif format in text_formats: + with path.open("w", encoding="utf-8") as f: + if format == ExportFormat.GEOJSON: + export_geojson(mesh_list, f, options) + else: + raise ValueError(f"Unsupported format: {format}") + + +# UI Helper Functions + + +def supported_formats() -> List[str]: + """ + Get list of supported export formats. + + :return: List of format names (e.g., ['obj', 'geojson', 'vtk', 'stl']) + """ + return ExportFormat.all_extensions() + + +def format_description(format: Union[str, ExportFormat]) -> str: + """ + Get human-readable description of a format. + + :param format: Format name or ExportFormat enum + :return: Description string + """ + if isinstance(format, str): + format = ExportFormat.from_extension(format) + + descriptions = { + ExportFormat.OBJ: "Wavefront OBJ - 3D geometry format (triangles and lines)", + ExportFormat.GEOJSON: "GeoJSON - Geographic data format (lines and polygons)", + ExportFormat.VTK: "VTK Legacy - Visualization Toolkit format", + ExportFormat.STL: "STL - Stereolithography format (triangles only)", + } + return descriptions.get(format, "Unknown format") + + +def format_filter_string(format: Union[str, ExportFormat]) -> str: + """ + Get file filter string for UI dialogs (Qt, tkinter, etc.). + + :param format: Format name or ExportFormat enum + :return: Filter string (e.g., "OBJ Files (*.obj)") + """ + if isinstance(format, str): + format = ExportFormat.from_extension(format) + + filters = { + ExportFormat.OBJ: "OBJ Files (*.obj)", + ExportFormat.GEOJSON: "GeoJSON Files (*.geojson)", + ExportFormat.VTK: "VTK Files (*.vtk)", + ExportFormat.STL: "STL Files (*.stl)", + } + return filters.get(format, "All Files (*.*)") + + +def all_formats_filter_string() -> str: + """ + Get file filter string for all supported formats. + Useful for Qt QFileDialog or similar UI components. + + :return: Filter string with all formats + """ + filters = [format_filter_string(fmt) for fmt in ExportFormat] + return ";;".join(filters) + + +def get_format_options_class(format: Union[str, ExportFormat]) -> Optional[type]: + """ + Get the options class for a specific format. + + :param format: Format name or ExportFormat enum + :return: Options class or None if no options available + """ + if isinstance(format, str): + format = ExportFormat.from_extension(format) + + options_map = { + ExportFormat.STL: STLExportOptions, + ExportFormat.VTK: VTKExportOptions, + ExportFormat.GEOJSON: GeoJSONExportOptions, + } + return options_map.get(format) + + +def supports_lines(format: Union[str, ExportFormat]) -> bool: + """ + Check if format supports line primitives. + + :param format: Format name or ExportFormat enum + :return: True if format supports lines + """ + if isinstance(format, str): + format = ExportFormat.from_extension(format) + + return format in {ExportFormat.OBJ, ExportFormat.GEOJSON, ExportFormat.VTK} + + +def supports_triangles(format: Union[str, ExportFormat]) -> bool: + """ + Check if format supports triangle primitives. + + :param format: Format name or ExportFormat enum + :return: True if format supports triangles + """ + # All formats support triangles + return True diff --git a/energyml-utils/src/energyml/utils/data/helper.py b/energyml-utils/src/energyml/utils/data/helper.py index febba46..9ebde1d 100644 --- a/energyml-utils/src/energyml/utils/data/helper.py +++ b/energyml-utils/src/energyml/utils/data/helper.py @@ -5,13 +5,14 @@ import sys from typing import Any, Optional, Callable, List, Union +from energyml.utils.storage_interface import EnergymlStorageInterface import numpy as np from .datasets_io import read_external_dataset_array from ..constants import flatten_concatenation -from ..epc import get_obj_identifier from ..exception import ObjectNotFoundNotError from ..introspection import ( + get_obj_uri, snake_case, get_object_attribute_no_verif, search_attribute_matching_name_with_path, @@ -21,7 +22,7 @@ get_object_attribute, get_object_attribute_rgx, ) -from ..workspace import EnergymlWorkspace + from .datasets_io import get_path_in_external_with_path _ARRAY_NAMES_ = [ @@ -86,20 +87,29 @@ def is_z_reversed(crs: Optional[Any]) -> bool: """ reverse_z_values = False if crs is not None: - # resqml 201 - zincreasing_downward = search_attribute_matching_name(crs, "ZIncreasingDownward") - if len(zincreasing_downward) > 0: - reverse_z_values = zincreasing_downward[0] - - # resqml >= 22 - vert_axis = search_attribute_matching_name(crs, "VerticalAxis.Direction") - if len(vert_axis) > 0: - vert_axis_str = str(vert_axis[0]) - if "." in vert_axis_str: - vert_axis_str = vert_axis_str.split(".")[-1] - - reverse_z_values = vert_axis_str.lower() == "down" - + if "VerticalCrs" in type(crs).__name__: + vert_axis = search_attribute_matching_name(crs, "Direction") + if len(vert_axis) > 0: + vert_axis_str = str(vert_axis[0]) + if "." in vert_axis_str: + vert_axis_str = vert_axis_str.split(".")[-1] + + reverse_z_values = vert_axis_str.lower() == "down" + else: + # resqml 201 + zincreasing_downward = search_attribute_matching_name(crs, "ZIncreasingDownward") + if len(zincreasing_downward) > 0: + reverse_z_values = zincreasing_downward[0] + + # resqml >= 22 + vert_axis = search_attribute_matching_name(crs, "VerticalAxis.Direction") + if len(vert_axis) > 0: + vert_axis_str = str(vert_axis[0]) + if "." in vert_axis_str: + vert_axis_str = vert_axis_str.split(".")[-1] + + reverse_z_values = vert_axis_str.lower() == "down" + logging.debug(f"is_z_reversed: {reverse_z_values}") return reverse_z_values @@ -114,7 +124,7 @@ def get_vertical_epsg_code(crs_object: Any): return vertical_epsg_code -def get_projected_epsg_code(crs_object: Any, workspace: Optional[EnergymlWorkspace] = None): +def get_projected_epsg_code(crs_object: Any, workspace: Optional[EnergymlStorageInterface] = None): if crs_object is not None: # LocalDepth3dCRS projected_epsg_code = get_object_attribute_rgx(crs_object, "ProjectedCrs.EpsgCode") if projected_epsg_code is None: # LocalEngineering2DCrs @@ -130,7 +140,7 @@ def get_projected_epsg_code(crs_object: Any, workspace: Optional[EnergymlWorkspa return None -def get_projected_uom(crs_object: Any, workspace: Optional[EnergymlWorkspace] = None): +def get_projected_uom(crs_object: Any, workspace: Optional[EnergymlStorageInterface] = None): if crs_object is not None: projected_epsg_uom = get_object_attribute_rgx(crs_object, "ProjectedUom") if projected_epsg_uom is None: @@ -144,7 +154,7 @@ def get_projected_uom(crs_object: Any, workspace: Optional[EnergymlWorkspace] = return None -def get_crs_origin_offset(crs_obj: Any) -> List[float]: +def get_crs_origin_offset(crs_obj: Any) -> List[float | int]: """ Return a list [X,Y,Z] corresponding to the crs Offset [XOffset/OriginProjectedCoordinate1, ... ] depending on the crs energyml version. @@ -163,12 +173,12 @@ def get_crs_origin_offset(crs_obj: Any) -> List[float]: if tmp_offset_z is None: tmp_offset_z = get_object_attribute_rgx(crs_obj, "OriginProjectedCoordinate3") - crs_point_offset = [0, 0, 0] + crs_point_offset = [0.0, 0.0, 0.0] try: crs_point_offset = [ - float(tmp_offset_x) if tmp_offset_x is not None else 0, - float(tmp_offset_y) if tmp_offset_y is not None else 0, - float(tmp_offset_z) if tmp_offset_z is not None else 0, + float(tmp_offset_x) if tmp_offset_x is not None else 0.0, + float(tmp_offset_y) if tmp_offset_y is not None else 0.0, + float(tmp_offset_z) if tmp_offset_z is not None else 0.0, ] except Exception as e: logging.info(f"ERR reading crs offset {e}") @@ -183,30 +193,66 @@ def prod_n_tab(val: Union[float, int, str], tab: List[Union[float, int, str]]): :param tab: :return: """ - return list(map(lambda x: x * val, tab)) + if val is None: + return [None] * len(tab) + logging.debug(f"Multiplying list by {val}: {tab}") + # Convert to numpy array for vectorized operations, handling None values + arr = np.array(tab, dtype=object) + logging.debug(f"arr: {arr}") + # Create mask for non-None values + mask = arr != None # noqa: E711 + # Create result array filled with None + result = np.full(len(tab), None, dtype=object) + logging.debug(f"result before multiplication: {result}") + # Multiply only non-None values + result[mask] = arr[mask].astype(float) * val + logging.debug(f"result after multiplication: {result}") + return result.tolist() def sum_lists(l1: List, l2: List): """ - Sums 2 lists values. + Sums 2 lists values, preserving None values. Example: [1,1,1] and [2,2,3,6] gives : [3,3,4,6] + [1,None,3] and [2,2,3] gives : [3,None,6] :param l1: :param l2: :return: """ - return [l1[i] + l2[i] for i in range(min(len(l1), len(l2)))] + max(l1, l2, key=len)[ - min(len(l1), len(l2)) : # noqa: E203 - ] + min_len = min(len(l1), len(l2)) + + # Convert to numpy arrays for vectorized operations + arr1 = np.array(l1[:min_len], dtype=object) + arr2 = np.array(l2[:min_len], dtype=object) + + # Create result array + result = np.full(min_len, None, dtype=object) + + # Find indices where both values are not None + mask = (arr1 != None) & (arr2 != None) # noqa: E711 + + # Sum only where both are not None + if np.any(mask): + result[mask] = arr1[mask].astype(float) + arr2[mask].astype(float) + + # Convert back to list and append remaining elements from longer list + result_list = result.tolist() + if len(l1) > min_len: + result_list.extend(l1[min_len:]) + elif len(l2) > min_len: + result_list.extend(l2[min_len:]) + + return result_list def get_crs_obj( context_obj: Any, path_in_root: Optional[str] = None, root_obj: Optional[Any] = None, - workspace: Optional[EnergymlWorkspace] = None, + workspace: Optional[EnergymlStorageInterface] = None, ) -> Optional[Any]: """ Search for the CRS object related to :param:`context_obj` into the :param:`workspace` @@ -222,12 +268,12 @@ def get_crs_obj( crs_list = search_attribute_matching_name(context_obj, r"\.*Crs", search_in_sub_obj=True, deep_search=False) if crs_list is not None and len(crs_list) > 0: # logging.debug(crs_list[0]) - crs = workspace.get_object_by_identifier(get_obj_identifier(crs_list[0])) + crs = workspace.get_object(get_obj_uri(crs_list[0])) if crs is None: crs = workspace.get_object_by_uuid(get_obj_uuid(crs_list[0])) if crs is None: logging.error(f"CRS {crs_list[0]} not found (or not read correctly)") - raise ObjectNotFoundNotError(get_obj_identifier(crs_list[0])) + raise ObjectNotFoundNotError(get_obj_uri(crs_list[0])) if crs is not None: return crs @@ -293,9 +339,9 @@ def read_external_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, -) -> Union[List[Any], np.ndarray]: + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, +) -> Optional[Union[List[Any], np.ndarray]]: """ Read an external array (BooleanExternalArray, BooleanHdf5Array, DoubleHdf5Array, IntegerHdf5Array, StringExternalArray ...) :param energyml_array: @@ -333,10 +379,11 @@ def read_external_array( ) if sub_indices is not None and len(sub_indices) > 0: - res = [] - for idx in sub_indices: - res.append(array[idx]) - array = res + if isinstance(array, np.ndarray): + array = array[sub_indices] + elif isinstance(array, list): + # Fallback for non-numpy arrays + array = [array[idx] for idx in sub_indices] return array @@ -357,9 +404,9 @@ def read_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, -) -> List[Any]: + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, +) -> Union[List[Any], np.ndarray]: """ Read an array and return a list. The array is read depending on its type. see. :py:func:`energyml.utils.data.helper.get_supported_array` :param energyml_array: @@ -393,8 +440,8 @@ def read_constant_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: Optional[List[int]] = None, + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[Any]: """ Read a constant array ( BooleanConstantArray, DoubleConstantArray, FloatingPointConstantArray, IntegerConstantArray ...) @@ -423,9 +470,9 @@ def read_xml_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, -) -> List[Any]: + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, +) -> Union[List[Any], np.ndarray]: """ Read a xml array ( BooleanXmlArray, FloatingPointXmlArray, IntegerXmlArray, StringXmlArray ...) :param energyml_array: @@ -439,10 +486,11 @@ def read_xml_array( # count = get_object_attribute_no_verif(energyml_array, "count_per_value") if sub_indices is not None and len(sub_indices) > 0: - res = [] - for idx in sub_indices: - res.append(values[idx]) - values = res + if isinstance(values, np.ndarray): + values = values[sub_indices] + elif isinstance(values, list): + # Use list comprehension for efficiency + values = [values[idx] for idx in sub_indices] return values @@ -450,8 +498,8 @@ def read_jagged_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[Any]: """ Read a jagged array @@ -465,27 +513,23 @@ def read_jagged_array( elements = read_array( energyml_array=get_object_attribute_no_verif(energyml_array, "elements"), root_obj=root_obj, - path_in_root=path_in_root + ".elements", + path_in_root=(path_in_root or "") + ".elements", workspace=workspace, ) cumulative_length = read_array( energyml_array=read_array(get_object_attribute_no_verif(energyml_array, "cumulative_length")), root_obj=root_obj, - path_in_root=path_in_root + ".cumulative_length", + path_in_root=(path_in_root or "") + ".cumulative_length", workspace=workspace, ) - array = [] - previous = 0 - for cl in cumulative_length: - array.append(elements[previous:cl]) - previous = cl + # Use list comprehension for better performance + array = [ + elements[cumulative_length[i - 1] if i > 0 else 0 : cumulative_length[i]] for i in range(len(cumulative_length)) + ] if sub_indices is not None and len(sub_indices) > 0: - res = [] - for idx in sub_indices: - res.append(array[idx]) - array = res + array = [array[idx] for idx in sub_indices] return array @@ -493,8 +537,8 @@ def read_int_double_lattice_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ): """ Read DoubleLatticeArray or IntegerLatticeArray. @@ -505,27 +549,33 @@ def read_int_double_lattice_array( :param sub_indices: :return: """ - # start_value = get_object_attribute_no_verif(energyml_array, "start_value") + start_value = get_object_attribute_no_verif(energyml_array, "start_value") offset = get_object_attribute_no_verif(energyml_array, "offset") - # result = [] + result = [] + + if len(offset) == 1: + # 1D lattice array: offset is a single DoubleConstantArray or IntegerConstantArray + offset_obj = offset[0] + + # Get the offset value and count from the ConstantArray + offset_value = get_object_attribute_no_verif(offset_obj, "value") + count = get_object_attribute_no_verif(offset_obj, "count") - # if len(offset) == 1: - # pass - # elif len(offset) == 2: - # pass - # else: - raise Exception(f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported") + # Generate the 1D array: start_value + i * offset_value for i in range(count) + result = [start_value + i * offset_value for i in range(count)] + else: + raise Exception(f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported") - # return result + return result def read_point3d_zvalue_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ): """ Read a Point3D2ValueArray @@ -540,7 +590,7 @@ def read_point3d_zvalue_array( sup_geom_array = read_array( energyml_array=supporting_geometry, root_obj=root_obj, - path_in_root=path_in_root + ".SupportingGeometry", + path_in_root=(path_in_root or "") + ".SupportingGeometry", workspace=workspace, sub_indices=sub_indices, ) @@ -550,21 +600,32 @@ def read_point3d_zvalue_array( read_array( energyml_array=zvalues, root_obj=root_obj, - path_in_root=path_in_root + ".ZValues", + path_in_root=(path_in_root or "") + ".ZValues", workspace=workspace, sub_indices=sub_indices, ) ) - count = 0 + # Use NumPy for vectorized operation if possible + error_logged = False - for i in range(len(sup_geom_array)): - try: - sup_geom_array[i][2] = zvalues_array[i] - except Exception as e: - if count == 0: - logging.error(e, f": {i} is out of bound of {len(zvalues_array)}") - count = count + 1 + if isinstance(sup_geom_array, np.ndarray) and isinstance(zvalues_array, np.ndarray): + # Vectorized assignment for NumPy arrays + min_len = min(len(sup_geom_array), len(zvalues_array)) + if min_len < len(sup_geom_array): + logging.warning( + f"Z-values array ({len(zvalues_array)}) is shorter than geometry array ({len(sup_geom_array)}), only updating first {min_len} values" + ) + sup_geom_array[:min_len, 2] = zvalues_array[:min_len] + else: + # Fallback for list-based arrays + for i in range(len(sup_geom_array)): + try: + sup_geom_array[i][2] = zvalues_array[i] + except (IndexError, TypeError) as e: + if not error_logged: + logging.error(f"{type(e).__name__}: index {i} is out of bound of {len(zvalues_array)}") + error_logged = True return sup_geom_array @@ -573,8 +634,8 @@ def read_point3d_from_representation_lattice_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ): """ Read a Point3DFromRepresentationLatticeArray. @@ -588,11 +649,9 @@ def read_point3d_from_representation_lattice_array( :param sub_indices: :return: """ - supporting_rep_identifier = get_obj_identifier( - get_object_attribute_no_verif(energyml_array, "supporting_representation") - ) + supporting_rep_identifier = get_obj_uri(get_object_attribute_no_verif(energyml_array, "supporting_representation")) # logging.debug(f"energyml_array : {energyml_array}\n\t{supporting_rep_identifier}") - supporting_rep = workspace.get_object_by_identifier(supporting_rep_identifier) + supporting_rep = workspace.get_object(supporting_rep_identifier) if workspace is not None else None # TODO chercher un pattern \.*patch\.*.[d]+ pour trouver le numero du patch dans le path_in_root puis lire le patch # logging.debug(f"path_in_root {path_in_root}") @@ -616,15 +675,15 @@ def read_grid2d_patch( patch: Any, grid2d: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, -) -> List: + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, +) -> Union[List, np.ndarray]: points_path, points_obj = search_attribute_matching_name_with_path(patch, "Geometry.Points")[0] return read_array( energyml_array=points_obj, root_obj=grid2d, - path_in_root=path_in_root + "." + points_path, + path_in_root=path_in_root + "." + points_path if path_in_root else points_path, workspace=workspace, sub_indices=sub_indices, ) @@ -634,8 +693,8 @@ def read_point3d_lattice_array( energyml_array: Any, root_obj: Optional[Any] = None, path_in_root: Optional[str] = None, - workspace: Optional[EnergymlWorkspace] = None, - sub_indices: List[int] = None, + workspace: Optional[EnergymlStorageInterface] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List: """ Read a Point3DLatticeArray. @@ -661,14 +720,14 @@ def read_point3d_lattice_array( obj=energyml_array, name_rgx="slowestAxisCount", root_obj=root_obj, - current_path=path_in_root, + current_path=path_in_root or "", ) crs_fa_count = search_attribute_in_upper_matching_name( obj=energyml_array, name_rgx="fastestAxisCount", root_obj=root_obj, - current_path=path_in_root, + current_path=path_in_root or "", ) crs = None @@ -695,7 +754,11 @@ def read_point3d_lattice_array( slowest_size = len(slowest_table) fastest_size = len(fastest_table) - if len(crs_sa_count) > 0 and len(crs_fa_count) > 0: + logging.debug(f"slowest vector: {slowest_vec}, spacing: {slowest_spacing}, size: {slowest_size}") + logging.debug(f"fastest vector: {fastest_vec}, spacing: {fastest_spacing}, size: {fastest_size}") + logging.debug(f"origin: {origin}, zincreasing_downward: {zincreasing_downward}") + + if crs_sa_count is not None and len(crs_sa_count) > 0 and crs_fa_count is not None and len(crs_fa_count) > 0: if (crs_sa_count[0] == fastest_size and crs_fa_count[0] == slowest_size) or ( crs_sa_count[0] == fastest_size - 1 and crs_fa_count[0] == slowest_size - 1 ): @@ -712,40 +775,74 @@ def read_point3d_lattice_array( slowest_size = crs_sa_count[0] fastest_size = crs_fa_count[0] - for i in range(slowest_size): - for j in range(fastest_size): - previous_value = origin - # to avoid a sum of the parts of the array at each iteration, I take the previous value in the same line - # number i and add the fastest_table[j] value - - if j > 0: - if i > 0: - line_idx = i * fastest_size # numero de ligne - previous_value = result[line_idx + j - 1] - else: - previous_value = result[j - 1] - if zincreasing_downward: - result.append(sum_lists(previous_value, slowest_table[i - 1])) - else: - result.append(sum_lists(previous_value, fastest_table[j - 1])) - else: - if i > 0: - prev_line_idx = (i - 1) * fastest_size # numero de ligne precedent - previous_value = result[prev_line_idx] - if zincreasing_downward: - result.append(sum_lists(previous_value, fastest_table[j - 1])) + # Vectorized approach using NumPy for massive performance improvement + try: + # Convert tables to NumPy arrays + origin_arr = np.array(origin, dtype=float) + slowest_arr = np.array(slowest_table, dtype=float) # shape: (slowest_size, 3) + fastest_arr = np.array(fastest_table, dtype=float) # shape: (fastest_size, 3) + + # Compute cumulative sums + slowest_cumsum = np.cumsum(slowest_arr, axis=0) # cumulative offset along slowest axis + fastest_cumsum = np.cumsum(fastest_arr, axis=0) # cumulative offset along fastest axis + + # Create meshgrid indices + i_indices, j_indices = np.meshgrid(np.arange(slowest_size), np.arange(fastest_size), indexing="ij") + + # Initialize result array + result_arr = np.zeros((slowest_size, fastest_size, 3), dtype=float) + result_arr[:, :, :] = origin_arr # broadcast origin to all positions + + # Add offsets based on zincreasing_downward + if zincreasing_downward: + # Add slowest offsets where i > 0 + result_arr[1:, :, :] += slowest_cumsum[:-1, np.newaxis, :] + # Add fastest offsets where j > 0 + result_arr[:, 1:, :] += fastest_cumsum[np.newaxis, :-1, :] + else: + # Add fastest offsets where j > 0 + result_arr[:, 1:, :] += fastest_cumsum[np.newaxis, :-1, :] + # Add slowest offsets where i > 0 + result_arr[1:, :, :] += slowest_cumsum[:-1, np.newaxis, :] + + # Flatten to list of points + result = result_arr.reshape(-1, 3).tolist() + + except (ValueError, TypeError) as e: + # Fallback to original implementation if NumPy conversion fails + logging.warning(f"NumPy vectorization failed ({e}), falling back to iterative approach") + for i in range(slowest_size): + for j in range(fastest_size): + previous_value = origin + + if j > 0: + if i > 0: + line_idx = i * fastest_size + previous_value = result[line_idx + j - 1] else: + previous_value = result[j - 1] + if zincreasing_downward: result.append(sum_lists(previous_value, slowest_table[i - 1])) + else: + result.append(sum_lists(previous_value, fastest_table[j - 1])) else: - result.append(previous_value) + if i > 0: + prev_line_idx = (i - 1) * fastest_size + previous_value = result[prev_line_idx] + if zincreasing_downward: + result.append(sum_lists(previous_value, fastest_table[j - 1])) + else: + result.append(sum_lists(previous_value, slowest_table[i - 1])) + else: + result.append(previous_value) else: raise Exception(f"{type(energyml_array)} read with an offset of length {len(offset)} is not supported") if sub_indices is not None and len(sub_indices) > 0: - res = [] - for idx in sub_indices: - res.append(result[idx]) - result = res + if isinstance(result, np.ndarray): + result = result[sub_indices].tolist() + else: + result = [result[idx] for idx in sub_indices] return result @@ -754,6 +851,6 @@ def read_point3d_lattice_array( # energyml_array: Any, # root_obj: Optional[Any] = None, # path_in_root: Optional[str] = None, -# workspace: Optional[EnergymlWorkspace] = None +# workspace: Optional[EnergymlStorageInterface] = None # ): # logging.debug(energyml_array) diff --git a/energyml-utils/src/energyml/utils/data/mesh.py b/energyml-utils/src/energyml/utils/data/mesh.py index 3ee9409..108da7e 100644 --- a/energyml-utils/src/energyml/utils/data/mesh.py +++ b/energyml-utils/src/energyml/utils/data/mesh.py @@ -16,25 +16,47 @@ from .helper import ( read_array, read_grid2d_patch, - EnergymlWorkspace, get_crs_obj, get_crs_origin_offset, is_z_reversed, ) -from ..epc import Epc, get_obj_identifier, gen_energyml_object_path -from ..epc_stream import EpcStreamReader -from ..exception import ObjectNotFoundNotError -from ..introspection import ( +from energyml.utils.epc import gen_energyml_object_path +from energyml.utils.epc_stream import EpcStreamReader +from energyml.utils.exception import NotSupportedError, ObjectNotFoundNotError +from energyml.utils.introspection import ( + get_obj_uri, search_attribute_matching_name, search_attribute_matching_name_with_path, snake_case, get_object_attribute, + get_object_attribute_rgx, ) +from energyml.utils.storage_interface import EnergymlStorageInterface + + +# Import export functions from new export module for backward compatibility +from .export import export_obj as _export_obj_new _FILE_HEADER: bytes = b"# file exported by energyml-utils python module (Geosiris)\n" Point = list[float] +# ============================ +# TODO : + +# obj_GridConnectionSetRepresentation +# obj_IjkGridRepresentation +# obj_PlaneSetRepresentation +# obj_RepresentationSetRepresentation +# obj_SealedSurfaceFrameworkRepresentation +# obj_SealedVolumeFrameworkRepresentation +# obj_SubRepresentation +# obj_UnstructuredGridRepresentation +# obj_WellboreMarkerFrameRepresentation +# obj_WellboreTrajectoryRepresentation + +# ============================ + class MeshFileFormat(Enum): OFF = "off" @@ -77,12 +99,12 @@ class AbstractMesh: crs_object: Any = field(default=None) - point_list: List[Point] = field( + point_list: Union[List[Point], np.ndarray] = field( default_factory=list, ) identifier: str = field( - default=None, + default="", ) def get_nb_edges(self) -> int: @@ -91,7 +113,7 @@ def get_nb_edges(self) -> int: def get_nb_faces(self) -> int: return 0 - def get_indices(self) -> List[List[int]]: + def get_indices(self) -> Union[List[List[int]], np.ndarray]: return [] @@ -102,7 +124,7 @@ class PointSetMesh(AbstractMesh): @dataclass class PolylineSetMesh(AbstractMesh): - line_indices: List[List[int]] = field( + line_indices: Union[List[List[int]], np.ndarray] = field( default_factory=list, ) @@ -112,13 +134,13 @@ def get_nb_edges(self) -> int: def get_nb_faces(self) -> int: return 0 - def get_indices(self) -> List[List[int]]: + def get_indices(self) -> Union[List[List[int]], np.ndarray]: return self.line_indices @dataclass class SurfaceMesh(AbstractMesh): - faces_indices: List[List[int]] = field( + faces_indices: Union[List[List[int]], np.ndarray] = field( default_factory=list, ) @@ -128,7 +150,7 @@ def get_nb_edges(self) -> int: def get_nb_faces(self) -> int: return len(self.faces_indices) - def get_indices(self) -> List[List[int]]: + def get_indices(self) -> Union[List[List[int]], np.ndarray]: return self.faces_indices @@ -145,7 +167,7 @@ def crs_displacement(points: List[Point], crs_obj: Any) -> Tuple[List[Point], Po if crs_point_offset != [0, 0, 0]: for p in points: for xyz in range(len(p)): - p[xyz] = p[xyz] + crs_point_offset[xyz] + p[xyz] = (p[xyz] + crs_point_offset[xyz]) if p[xyz] is not None else None if zincreasing_downward and len(p) >= 3: p[2] = -p[2] @@ -178,9 +200,9 @@ def _mesh_name_mapping(array_type_name: str) -> str: def read_mesh_object( energyml_object: Any, - workspace: Optional[EnergymlWorkspace] = None, + workspace: Optional[EnergymlStorageInterface] = None, use_crs_displacement: bool = False, - sub_indices: List[int] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[AbstractMesh]: """ Read and "meshable" object. If :param:`energyml_object` is not supported, an exception will be raised. @@ -190,28 +212,44 @@ def read_mesh_object( is used to translate the data with the CRS offsets :return: """ + if isinstance(energyml_object, list): return energyml_object array_type_name = _mesh_name_mapping(type(energyml_object).__name__) reader_func = get_mesh_reader_function(array_type_name) if reader_func is not None: + # logging.info(f"using function {reader_func} to read type {array_type_name}") surfaces: List[AbstractMesh] = reader_func( energyml_object=energyml_object, workspace=workspace, sub_indices=sub_indices ) - if use_crs_displacement: + if ( + use_crs_displacement and "wellbore" not in array_type_name.lower() + ): # WellboreFrameRep has allready the displacement applied + # TODO: the displacement should be done in each reader function to manage specific cases for s in surfaces: + print("CRS : ", s.crs_object.uuid if s.crs_object is not None else "None") crs_displacement(s.point_list, s.crs_object) return surfaces else: - logging.error(f"Type {array_type_name} is not supported: function read_{snake_case(array_type_name)} not found") - raise Exception( - f"Type {array_type_name} is not supported\n\t{energyml_object}: \n\tfunction read_{snake_case(array_type_name)} not found" + # logging.error(f"Type {array_type_name} is not supported: function read_{snake_case(array_type_name)} not found") + raise NotSupportedError( + f"Type {array_type_name} is not supported\n\tfunction read_{snake_case(array_type_name)} not found" ) +def read_ijk_grid_representation( + energyml_object: Any, + workspace: EnergymlStorageInterface, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, +) -> List[Any]: + raise NotSupportedError("IJKGrid representation reading is not supported yet.") + + def read_point_representation( - energyml_object: Any, workspace: EnergymlWorkspace, sub_indices: List[int] = None + energyml_object: Any, + workspace: EnergymlStorageInterface, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[PointSetMesh]: # pt_geoms = search_attribute_matching_type(point_set, "AbstractGeometry") @@ -273,7 +311,9 @@ def read_point_representation( def read_polyline_representation( - energyml_object: Any, workspace: EnergymlWorkspace, sub_indices: List[int] = None + energyml_object: Any, + workspace: EnergymlStorageInterface, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[PolylineSetMesh]: # pt_geoms = search_attribute_matching_type(point_set, "AbstractGeometry") @@ -364,7 +404,7 @@ def read_polyline_representation( if len(points) > 0: meshes.append( PolylineSetMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + identifier=f"{get_obj_uri(energyml_object)}_patch{patch_idx}", energyml_object=energyml_object, crs_object=crs, point_list=points, @@ -381,9 +421,9 @@ def gen_surface_grid_geometry( energyml_object: Any, patch: Any, patch_path: Any, - workspace: Optional[EnergymlWorkspace] = None, + workspace: Optional[EnergymlStorageInterface] = None, keep_holes=False, - sub_indices: List[int] = None, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, offset: int = 0, ): points = read_grid2d_patch( @@ -392,6 +432,8 @@ def gen_surface_grid_geometry( path_in_root=patch_path, workspace=workspace, ) + logging.debug(f"Total points read: {len(points)}") + logging.debug(f"Sample points: {points[0:5]}") fa_count = search_attribute_matching_name(patch, "FastestAxisCount") if fa_count is None: @@ -430,7 +472,7 @@ def gen_surface_grid_geometry( sa_count = sa_count + 1 fa_count = fa_count + 1 - # logging.debug(f"sa_count {sa_count} fa_count {fa_count} : {sa_count*fa_count} - {len(points)} ") + logging.debug(f"sa_count {sa_count} fa_count {fa_count} : {sa_count * fa_count} - {len(points)} ") for sa in range(sa_count - 1): for fa in range(fa_count - 1): @@ -478,7 +520,10 @@ def gen_surface_grid_geometry( def read_grid2d_representation( - energyml_object: Any, workspace: Optional[EnergymlWorkspace] = None, keep_holes=False, sub_indices: List[int] = None + energyml_object: Any, + workspace: Optional[EnergymlStorageInterface] = None, + keep_holes=False, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[SurfaceMesh]: # h5_reader = HDF5FileReader() meshes = [] @@ -516,7 +561,7 @@ def read_grid2d_representation( meshes.append( SurfaceMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + identifier=f"{get_obj_uri(energyml_object)}_patch{patch_idx}", energyml_object=energyml_object, crs_object=crs, point_list=points, @@ -555,7 +600,7 @@ def read_grid2d_representation( ) meshes.append( SurfaceMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + identifier=f"{get_obj_uri(energyml_object)}_patch{patch_idx}", energyml_object=energyml_object, crs_object=crs, point_list=points, @@ -568,8 +613,8 @@ def read_grid2d_representation( def read_triangulated_set_representation( energyml_object: Any, - workspace: EnergymlWorkspace, - sub_indices: List[int] = None, + workspace: EnergymlStorageInterface, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[SurfaceMesh]: meshes = [] @@ -634,7 +679,7 @@ def read_triangulated_set_representation( total_size = total_size + len(triangles_list) meshes.append( SurfaceMesh( - identifier=f"{get_obj_identifier(energyml_object)}_patch{patch_idx}", + identifier=f"{get_obj_uri(energyml_object)}_patch{patch_idx}", energyml_object=energyml_object, crs_object=crs, point_list=point_list, @@ -647,19 +692,167 @@ def read_triangulated_set_representation( return meshes +def read_wellbore_frame_representation( + energyml_object: Any, + workspace: EnergymlStorageInterface, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, +) -> List[PolylineSetMesh]: + """ + Read a WellboreFrameRepresentation and construct a polyline mesh from the trajectory. + + :param energyml_object: The WellboreFrameRepresentation object + :param workspace: The EnergymlStorageInterface to access related objects + :param sub_indices: Optional list of indices to filter specific nodes + :return: List containing a single PolylineSetMesh representing the wellbore + """ + meshes = [] + + try: + # Read measured depths (NodeMd) + md_array = [] + try: + node_md_path, node_md_obj = search_attribute_matching_name_with_path(energyml_object, "NodeMd")[0] + md_array = read_array( + energyml_array=node_md_obj, + root_obj=energyml_object, + path_in_root=node_md_path, + workspace=workspace, + ) + if not isinstance(md_array, list): + md_array = md_array.tolist() if hasattr(md_array, "tolist") else list(md_array) + except (IndexError, AttributeError) as e: + logging.warning(f"Could not read NodeMd from wellbore frame: {e}") + return meshes + + # Get trajectory reference + trajectory_dor = search_attribute_matching_name(obj=energyml_object, name_rgx="Trajectory")[0] + trajectory_identifier = get_obj_uri(trajectory_dor) + trajectory_obj = workspace.get_object(trajectory_identifier) + + if trajectory_obj is None: + logging.error(f"Trajectory {trajectory_identifier} not found") + return meshes + + # CRS + crs = None + + # Get reference point (wellhead location) - try different attribute paths for different versions + head_x, head_y, head_z = 0.0, 0.0, 0.0 + z_is_up = True # Default assumption + + try: + # Try to get MdDatum (RESQML 2.0.1) or MdInterval.Datum (RESQML 2.2+) + md_datum_dor = None + try: + md_datum_dor = search_attribute_matching_name(obj=trajectory_obj, name_rgx=r"MdDatum")[0] + except IndexError: + try: + md_datum_dor = search_attribute_matching_name(obj=trajectory_obj, name_rgx=r"MdInterval.Datum")[0] + except IndexError: + pass + + if md_datum_dor is not None: + md_datum_identifier = get_obj_uri(md_datum_dor) + md_datum_obj = workspace.get_object(md_datum_identifier) + + if md_datum_obj is not None: + # Try to get coordinates from ReferencePointInACrs + try: + head_x = get_object_attribute_rgx(md_datum_obj, r"HorizontalCoordinates.Coordinate1") or 0.0 + head_y = get_object_attribute_rgx(md_datum_obj, r"HorizontalCoordinates.Coordinate2") or 0.0 + head_z = get_object_attribute_rgx(md_datum_obj, "VerticalCoordinate") or 0.0 + + # Get vertical CRS to determine z direction + try: + vcrs_dor = search_attribute_matching_name(obj=md_datum_obj, name_rgx="VerticalCrs")[0] + vcrs_identifier = get_obj_uri(vcrs_dor) + vcrs_obj = workspace.get_object(vcrs_identifier) + + if vcrs_obj is not None: + z_is_up = not is_z_reversed(vcrs_obj) + except (IndexError, AttributeError): + pass + except AttributeError: + pass + # Get CRS from trajectory geometry if available + try: + geometry_paths = search_attribute_matching_name_with_path(md_datum_obj, r"VerticalCrs") + if len(geometry_paths) > 0: + crs_dor_path, crs_dor = geometry_paths[0] + crs_identifier = get_obj_uri(crs_dor) + crs = workspace.get_object(crs_identifier) + except Exception as e: + logging.debug(f"Could not get CRS from trajectory: {e}") + except Exception as e: + logging.debug(f"Could not get reference point from trajectory: {e}") + + # Build wellbore path points - simple vertical projection from measured depths + # Note: This is a simplified representation. For accurate 3D trajectory, + # you would need to interpolate along the trajectory's control points. + points = [] + line_indices = [] + + for i, md in enumerate(md_array): + # Create point at (head_x, head_y, head_z +/- md) + # Apply z direction based on CRS + z_offset = md if z_is_up else -md + points.append([head_x, head_y, head_z + z_offset]) + + # Connect consecutive points + if i > 0: + line_indices.append([i - 1, i]) + + # Apply sub_indices filter if provided + if sub_indices is not None and len(sub_indices) > 0: + filtered_points = [] + filtered_indices = [] + index_map = {} + + for new_idx, old_idx in enumerate(sub_indices): + if 0 <= old_idx < len(points): + filtered_points.append(points[old_idx]) + index_map[old_idx] = new_idx + + for line in line_indices: + if line[0] in index_map and line[1] in index_map: + filtered_indices.append([index_map[line[0]], index_map[line[1]]]) + + points = filtered_points + line_indices = filtered_indices + + if len(points) > 0: + meshes.append( + PolylineSetMesh( + identifier=f"{get_obj_uri(energyml_object)}_wellbore", + energyml_object=energyml_object, + crs_object=crs, + point_list=points, + line_indices=line_indices, + ) + ) + + except Exception as e: + logging.error(f"Failed to read wellbore frame representation: {e}") + import traceback + + traceback.print_exc() + + return meshes + + def read_sub_representation( energyml_object: Any, - workspace: EnergymlWorkspace, - sub_indices: List[int] = None, + workspace: EnergymlStorageInterface, + sub_indices: Optional[Union[List[int], np.ndarray]] = None, ) -> List[AbstractMesh]: supporting_rep_dor = search_attribute_matching_name( obj=energyml_object, name_rgx=r"(SupportingRepresentation|RepresentedObject)" )[0] - supporting_rep_identifier = get_obj_identifier(supporting_rep_dor) - supporting_rep = workspace.get_object_by_identifier(supporting_rep_identifier) + supporting_rep_identifier = get_obj_uri(supporting_rep_dor) + supporting_rep = workspace.get_object(supporting_rep_identifier) total_size = 0 - all_indices = [] + all_indices = None for patch_path, patch_indices in search_attribute_matching_name_with_path( obj=energyml_object, name_rgx="SubRepresentationPatch.\\d+.ElementIndices.\\d+.Indices", @@ -690,7 +883,7 @@ def read_sub_representation( else: total_size = total_size + len(array) - all_indices = all_indices + array + all_indices = all_indices + array if all_indices is not None else array meshes = read_mesh_object( energyml_object=supporting_rep, workspace=workspace, @@ -698,7 +891,7 @@ def read_sub_representation( ) for m in meshes: - m.identifier = f"sub representation {get_obj_identifier(energyml_object)} of {m.identifier}" + m.identifier = f"sub representation {get_obj_uri(energyml_object)} of {m.identifier}" return meshes @@ -1250,31 +1443,17 @@ def export_obj(mesh_list: List[AbstractMesh], out: BytesIO, obj_name: Optional[s """ Export an :class:`AbstractMesh` into obj format. + This function is maintained for backward compatibility and delegates to the + export module. For new code, consider importing from energyml.utils.data.export. + Each AbstractMesh from the list :param:`mesh_list` will be placed into its own group. :param mesh_list: :param out: :param obj_name: :return: """ - out.write("# Generated by energyml-utils a Geosiris python module\n\n".encode("utf-8")) - - if obj_name is not None: - out.write(f"o {obj_name}\n\n".encode("utf-8")) - - point_offset = 0 - for m in mesh_list: - out.write(f"g {m.identifier}\n\n".encode("utf-8")) - _export_obj_elt( - off_point_part=out, - off_face_part=out, - points=m.point_list, - indices=m.get_indices(), - point_offset=point_offset, - colors=[], - elt_letter="l" if isinstance(m, PolylineSetMesh) else "f", - ) - point_offset = point_offset + len(m.point_list) - out.write("\n".encode("utf-8")) + # Delegate to the new export module + _export_obj_new(mesh_list, out, obj_name) def _export_obj_elt( diff --git a/energyml-utils/src/energyml/utils/epc.py b/energyml-utils/src/energyml/utils/epc.py index 28e7c1b..e44fe22 100644 --- a/energyml-utils/src/energyml/utils/epc.py +++ b/energyml-utils/src/energyml/utils/epc.py @@ -30,6 +30,7 @@ Keywords1, TargetMode, ) +from energyml.utils.storage_interface import DataArrayMetadata, EnergymlStorageInterface, ResourceMetadata import numpy as np from .uri import Uri, parse_uri from xsdata.formats.dataclass.models.generics import DerivedElement @@ -87,12 +88,11 @@ read_energyml_json_bytes, JSON_VERSION, ) -from .workspace import EnergymlWorkspace from .xml import is_energyml_content_type @dataclass -class Epc(EnergymlWorkspace): +class Epc(EnergymlStorageInterface): """ A class that represent an EPC file content """ @@ -125,6 +125,8 @@ class Epc(EnergymlWorkspace): default_factory=list, ) + force_h5_path: Optional[str] = field(default=None) + """ Additional rels for objects. Key is the object (same than in @energyml_objects) and value is a list of RelationShip. This can be used to link an HDF5 to an ExternalPartReference in resqml 2.0.1 @@ -429,6 +431,10 @@ def get_h5_file_paths(self, obj: Any) -> List[str]: Get all HDF5 file paths referenced in the EPC file (from rels to external resources) :return: list of HDF5 file paths """ + + if self.force_h5_path is not None: + return [self.force_h5_path] + is_uri = (isinstance(obj, str) and parse_uri(obj) is not None) or isinstance(obj, Uri) if is_uri: obj = self.get_object_by_identifier(obj) @@ -452,8 +458,6 @@ def get_h5_file_paths(self, obj: Any) -> List[str]: h5_paths.add(possible_h5_path) return list(h5_paths) - # -- Functions inherited from EnergymlWorkspace - def get_object_as_dor(self, identifier: str, dor_qualified_type) -> Optional[Any]: """ Search an object by its identifier and returns a DOR @@ -487,8 +491,8 @@ def get_object_by_identifier(self, identifier: Union[str, Uri]) -> Optional[Any] return o return None - def get_object(self, uuid: str, object_version: Optional[str]) -> Optional[Any]: - return self.get_object_by_identifier(f"{uuid}.{object_version or ''}") + def get_object(self, identifier: Union[str, Uri]) -> Optional[Any]: + return self.get_object_by_identifier(identifier) def add_object(self, obj: Any) -> bool: """ @@ -634,11 +638,12 @@ def write_array( # Class methods @classmethod - def read_file(cls, epc_file_path: str): + def read_file(cls, epc_file_path: str) -> "Epc": with open(epc_file_path, "rb") as f: epc = cls.read_stream(BytesIO(f.read())) epc.epc_file_path = epc_file_path return epc + raise IOError(f"Failed to open EPC file {epc_file_path}") @classmethod def read_stream(cls, epc_file_io: BytesIO): # returns an Epc instance @@ -770,6 +775,45 @@ def read_stream(cls, epc_file_io: BytesIO): # returns an Epc instance return None + def list_objects(self, dataspace: str | None = None, object_type: str | None = None) -> List[ResourceMetadata]: + result = [] + for obj in self.energyml_objects: + if (dataspace is None or get_obj_type(get_obj_usable_class(obj)) == dataspace) and ( + object_type is None or get_qualified_type_from_class(type(obj)) == object_type + ): + res_meta = ResourceMetadata( + uri=str(get_obj_uri(obj)), + uuid=get_obj_uuid(obj), + title=get_object_attribute(obj, "citation.title") or "", + object_type=type(obj).__name__, + version=get_obj_version(obj), + content_type=get_content_type_from_class(type(obj)) or "", + ) + result.append(res_meta) + return result + + def put_object(self, obj: Any, dataspace: str | None = None) -> str | None: + if self.add_object(obj): + return str(get_obj_uri(obj)) + return None + + def delete_object(self, identifier: Union[str, Any]) -> bool: + obj = self.get_object_by_identifier(identifier) + if obj is not None: + self.remove_object(identifier) + return True + return False + + def get_array_metadata( + self, proxy: str | Uri | Any, path_in_external: str | None = None + ) -> DataArrayMetadata | List[DataArrayMetadata] | None: + array = self.read_array(proxy=proxy, path_in_external=path_in_external) + if array is not None: + if isinstance(array, np.ndarray): + return DataArrayMetadata.from_numpy_array(path_in_resource=path_in_external, array=array) + elif isinstance(array, list): + return DataArrayMetadata.from_list(path_in_resource=path_in_external, data=array) + def dumps_epc_content_and_files_lists(self) -> str: """ Dumps the EPC content and files lists for debugging purposes. @@ -782,6 +826,13 @@ def dumps_epc_content_and_files_lists(self) -> str: return "EPC Content:\n" + "\n".join(content_list) + "\n\nRaw Files:\n" + "\n".join(raw_files_list) + def close(self) -> None: + """ + Close the EPC file and release any resources. + :return: + """ + pass + # ______ __ ____ __ _ # / ____/___ ___ _________ ___ ______ ___ / / / __/_ ______ _____/ /_(_)___ ____ _____ diff --git a/energyml-utils/src/energyml/utils/epc_stream.py b/energyml-utils/src/energyml/utils/epc_stream.py index 721f9d6..bad61ec 100644 --- a/energyml-utils/src/energyml/utils/epc_stream.py +++ b/energyml-utils/src/energyml/utils/epc_stream.py @@ -21,12 +21,19 @@ from energyml.opc.opc import Types, Override, CoreProperties, Relationships, Relationship from energyml.utils.data.datasets_io import HDF5FileReader, HDF5FileWriter +from energyml.utils.storage_interface import DataArrayMetadata, EnergymlStorageInterface, ResourceMetadata from energyml.utils.uri import Uri, parse_uri -from energyml.utils.workspace import EnergymlWorkspace +import h5py import numpy as np -from .constants import EPCRelsRelationshipType, OptimizedRegex, EpcExportVersion -from .epc import Epc, gen_energyml_object_path, gen_rels_path, get_epc_content_type_path -from .introspection import ( +from energyml.utils.constants import ( + EPCRelsRelationshipType, + OptimizedRegex, + EpcExportVersion, + content_type_to_qualified_type, +) +from energyml.utils.epc import Epc, gen_energyml_object_path, gen_rels_path, get_epc_content_type_path + +from energyml.utils.introspection import ( get_class_from_content_type, get_obj_content_type, get_obj_identifier, @@ -36,7 +43,7 @@ get_obj_type, get_obj_usable_class, ) -from .serialization import read_energyml_xml_bytes, serialize_xml +from energyml.utils.serialization import read_energyml_xml_bytes, serialize_xml from .xml import is_energyml_content_type @@ -48,8 +55,8 @@ class EpcObjectMetadata: object_type: str content_type: str file_path: str - version: Optional[str] = None identifier: Optional[str] = None + version: Optional[str] = None def __post_init__(self): if self.identifier is None: @@ -79,7 +86,7 @@ def memory_efficiency(self) -> float: return (1 - (self.loaded_objects / self.total_objects)) * 100 if self.total_objects > 0 else 100.0 -class EpcStreamReader(EnergymlWorkspace): +class EpcStreamReader(EnergymlStorageInterface): """ Memory-efficient EPC file reader with lazy loading and smart caching. @@ -110,6 +117,8 @@ def __init__( preload_metadata: bool = True, export_version: EpcExportVersion = EpcExportVersion.CLASSIC, force_h5_path: Optional[str] = None, + keep_open: bool = False, + force_title_load: bool = False, ): """ Initialize the EPC stream reader. @@ -121,11 +130,16 @@ def __init__( preload_metadata: Whether to preload all object metadata export_version: EPC packaging version (CLASSIC or EXPANDED) force_h5_path: Optional forced HDF5 file path for external resources. If set, all arrays will be read/written from/to this path. + keep_open: If True, keeps the ZIP file open for better performance with multiple operations. File is closed only when instance is deleted or close() is called. + force_title_load: If True, forces loading object titles when listing objects (may impact performance) """ self.epc_file_path = Path(epc_file_path) self.cache_size = cache_size self.validate_on_load = validate_on_load self.force_h5_path = force_h5_path + self.cache_opened_h5 = None + self.keep_open = keep_open + self.force_title_load = force_title_load is_new_file = False @@ -145,7 +159,7 @@ def __init__( with zipfile.ZipFile(self.epc_file_path, "r") as zf: content_types_path = get_epc_content_type_path() if content_types_path not in zf.namelist(): - logging.info(f"EPC file is missing required structure. Initializing empty EPC file.") + logging.info("EPC file is missing required structure. Initializing empty EPC file.") self._create_empty_epc() is_new_file = True except Exception as e: @@ -166,6 +180,7 @@ def __init__( # File handle management self._zip_file: Optional[zipfile.ZipFile] = None + self._persistent_zip: Optional[zipfile.ZipFile] = None # Used when keep_open=True # EPC export version detection self.export_version: EpcExportVersion = export_version or EpcExportVersion.CLASSIC # Default @@ -179,6 +194,10 @@ def __init__( # Detect EPC version after loading metadata self.export_version = self._detect_epc_version() + # Open persistent ZIP file if keep_open is enabled + if self.keep_open and not is_new_file: + self._persistent_zip = zipfile.ZipFile(self.epc_file_path, "r") + def _create_empty_epc(self) -> None: """Create an empty EPC file structure.""" # Ensure directory exists @@ -218,14 +237,22 @@ def _load_metadata(self) -> None: @contextmanager def _get_zip_file(self) -> Iterator[zipfile.ZipFile]: - """Context manager for ZIP file access with proper resource management.""" - zf = None - try: - zf = zipfile.ZipFile(self.epc_file_path, "r") - yield zf - finally: - if zf is not None: - zf.close() + """Context manager for ZIP file access with proper resource management. + + If keep_open is True, uses the persistent connection. Otherwise opens a new one. + """ + if self.keep_open and self._persistent_zip is not None: + # Use persistent connection, don't close it + yield self._persistent_zip + else: + # Open and close per request + zf = None + try: + zf = zipfile.ZipFile(self.epc_file_path, "r") + yield zf + finally: + if zf is not None: + zf.close() def _read_content_types(self, zf: zipfile.ZipFile) -> Types: """Read and parse [Content_Types].xml file.""" @@ -507,6 +534,9 @@ def get_object_by_uuid(self, uuid: str) -> List[Any]: return objects + def get_object(self, identifier: Union[str, Uri]) -> Optional[Any]: + return self.get_object_by_identifier(identifier) + def get_objects_by_type(self, object_type: str) -> List[Any]: """Get all objects of the specified type.""" if object_type not in self._type_index: @@ -539,6 +569,88 @@ def get_statistics(self) -> EpcStreamingStats: """Get current streaming statistics.""" return self.stats + def list_objects( + self, dataspace: Optional[str] = None, object_type: Optional[str] = None + ) -> List[ResourceMetadata]: + """ + List all objects with metadata (EnergymlStorageInterface method). + + Args: + dataspace: Optional dataspace filter (ignored for EPC files) + object_type: Optional type filter (qualified type) + + Returns: + List of ResourceMetadata for all matching objects + """ + + results = [] + metadata_list = self.list_object_metadata(object_type) + + for meta in metadata_list: + try: + # Load object to get title + title = "" + if self.force_title_load: + obj = self.get_object_by_identifier(meta.identifier) + if obj and hasattr(obj, "citation") and obj.citation: + if hasattr(obj.citation, "title"): + title = obj.citation.title + + # Build URI + qualified_type = content_type_to_qualified_type(meta.content_type) + if meta.version: + uri = f"eml:///{qualified_type}(uuid={meta.uuid},version='{meta.version}')" + else: + uri = f"eml:///{qualified_type}({meta.uuid})" + + resource = ResourceMetadata( + uri=uri, + uuid=meta.uuid, + version=meta.version, + title=title, + object_type=meta.object_type, + content_type=meta.content_type, + ) + + results.append(resource) + except Exception: + continue + + return results + + def get_array_metadata( + self, proxy: Union[str, Uri, Any], path_in_external: Optional[str] = None + ) -> Union[DataArrayMetadata, List[DataArrayMetadata], None]: + """ + Get metadata for data array(s) (EnergymlStorageInterface method). + + Args: + proxy: The object identifier/URI or the object itself + path_in_external: Optional specific path + + Returns: + DataArrayMetadata if path specified, List[DataArrayMetadata] if no path, + or None if not found + """ + from energyml.utils.storage_interface import DataArrayMetadata + + try: + if path_in_external: + array = self.read_array(proxy, path_in_external) + if array is not None: + return DataArrayMetadata( + path_in_resource=path_in_external, + array_type=str(array.dtype), + dimensions=list(array.shape), + ) + else: + # Would need to scan all possible paths - not practical + return [] + except Exception: + pass + + return None + def preload_objects(self, identifiers: List[str]) -> int: """ Preload specific objects into cache. @@ -659,12 +771,19 @@ def read_array(self, proxy: Union[str, Uri, Any], path_in_external: str) -> Opti :return: the dataset as a numpy array """ # Resolve proxy to object - if isinstance(proxy, (str, Uri)): - obj = self.get_object_by_identifier(proxy) + + h5_path = [] + if self.force_h5_path is not None: + if self.cache_opened_h5 is None: + self.cache_opened_h5 = h5py.File(self.force_h5_path, "a") + h5_path = [self.cache_opened_h5] else: - obj = proxy + if isinstance(proxy, (str, Uri)): + obj = self.get_object_by_identifier(proxy) + else: + obj = proxy - h5_path = self.get_h5_file_paths(obj) + h5_path = self.get_h5_file_paths(obj) h5_reader = HDF5FileReader() @@ -688,13 +807,18 @@ def write_array(self, proxy: Union[str, Uri, Any], path_in_external: str, array: return: True if successful """ - # Resolve proxy to object - if isinstance(proxy, (str, Uri)): - obj = self.get_object_by_identifier(proxy) + h5_path = [] + if self.force_h5_path is not None: + if self.cache_opened_h5 is None: + self.cache_opened_h5 = h5py.File(self.force_h5_path, "a") + h5_path = [self.cache_opened_h5] else: - obj = proxy + if isinstance(proxy, (str, Uri)): + obj = self.get_object_by_identifier(proxy) + else: + obj = proxy - h5_path = self.get_h5_file_paths(obj) + h5_path = self.get_h5_file_paths(obj) h5_writer = HDF5FileWriter() @@ -772,6 +896,71 @@ def __enter__(self): def __exit__(self, exc_type, exc_val, exc_tb): """Context manager exit with cleanup.""" self.clear_cache() + self.close() + if self.cache_opened_h5 is not None: + try: + self.cache_opened_h5.close() + except Exception: + pass + self.cache_opened_h5 = None + + def __del__(self): + """Destructor to ensure persistent ZIP file is closed.""" + try: + self.close() + if self.cache_opened_h5 is not None: + try: + self.cache_opened_h5.close() + except Exception: + pass + self.cache_opened_h5 = None + except Exception: + pass # Ignore errors during cleanup + + def close(self) -> None: + """Close the persistent ZIP file if it's open, recomputing rels first.""" + # Recompute all relationships before closing to ensure consistency + try: + self.rebuild_all_rels(clean_first=True) + except Exception as e: + logging.warning(f"Error rebuilding rels on close: {e}") + + if self._persistent_zip is not None: + try: + self._persistent_zip.close() + except Exception as e: + logging.debug(f"Error closing persistent ZIP file: {e}") + finally: + self._persistent_zip = None + + def _reopen_persistent_zip(self) -> None: + """Reopen persistent ZIP file after modifications to reflect changes. + + This is called after any operation that modifies the EPC file to ensure + that subsequent reads see the updated content. + """ + if self.keep_open and self._persistent_zip is not None: + try: + self._persistent_zip.close() + except Exception: + pass + self._persistent_zip = zipfile.ZipFile(self.epc_file_path, "r") + + def put_object(self, obj: Any, dataspace: Optional[str] = None) -> Optional[str]: + """ + Store an energyml object (EnergymlStorageInterface method). + + Args: + obj: The energyml object to store + dataspace: Optional dataspace name (ignored for EPC files) + + Returns: + The identifier of the stored object (UUID.version or UUID), or None on error + """ + try: + return self.add_object(obj, replace_if_exists=True) + except Exception: + return None def add_object(self, obj: Any, file_path: Optional[str] = None, replace_if_exists: bool = True) -> str: """ @@ -867,6 +1056,18 @@ def add_object(self, obj: Any, file_path: Optional[str] = None, replace_if_exist self._rollback_add_object(identifier) raise RuntimeError(f"Failed to add object to EPC: {e}") + def delete_object(self, identifier: Union[str, Uri]) -> bool: + """ + Delete an object by its identifier (EnergymlStorageInterface method). + + Args: + identifier: Object identifier (UUID or UUID.version) or ETP URI + + Returns: + True if successfully deleted, False otherwise + """ + return self.remove_object(identifier) + def remove_object(self, identifier: Union[str, Uri]) -> bool: """ Remove an object (or all versions of an object) from the EPC file and update caches. @@ -913,7 +1114,15 @@ def remove_object(self, identifier: Union[str, Uri]) -> bool: raise RuntimeError(f"Failed to remove object from EPC: {e}") def _remove_single_object(self, identifier: str) -> bool: - """Remove a single object by its full identifier.""" + """ + Remove a single object by its full identifier. + The rels files of other objects referencing this object are NOT updated. You must update them manually (or close the epc, the rels are regenerated on epc close). + Args: + identifier: The full identifier (uuid.version) of the object to remove + Returns: + True if the object was successfully removed, False otherwise + + """ try: if identifier not in self._metadata: return False @@ -1038,6 +1247,7 @@ def add_rels_for_object(self, identifier: Union[str, Uri, Any], relationships: L target_zip, ) shutil.move(temp_path, self.epc_file_path) + self._reopen_persistent_zip() def _compute_object_rels(self, obj: Any, obj_identifier: str) -> List[Relationship]: """ @@ -1304,9 +1514,13 @@ def _add_object_to_file(self, obj: Any, metadata: EpcObjectMetadata) -> None: # Update .rels files by merging with existing ones read from source updated_rels_paths = self._update_rels_files(obj, metadata, source_zip, target_zip) - # Copy all existing files except [Content_Types].xml and rels we'll update + # Copy all existing files except [Content_Types].xml, the object file, and rels we already updated for item in source_zip.infolist(): - if item.filename == get_epc_content_type_path() or item.filename in updated_rels_paths: + if ( + item.filename == get_epc_content_type_path() + or item.filename == metadata.file_path + or item.filename in updated_rels_paths + ): continue data = source_zip.read(item.filename) target_zip.writestr(item, data) @@ -1317,6 +1531,7 @@ def _add_object_to_file(self, obj: Any, metadata: EpcObjectMetadata) -> None: # Replace original file with updated version shutil.move(temp_path, self.epc_file_path) + self._reopen_persistent_zip() except Exception as e: # Clean up temp file on error @@ -1352,6 +1567,7 @@ def _remove_object_from_file(self, metadata: EpcObjectMetadata) -> None: # Replace original file with updated version shutil.move(temp_path, self.epc_file_path) + self._reopen_persistent_zip() except Exception: # Clean up temp file on error @@ -1686,6 +1902,7 @@ def rebuild_all_rels(self, clean_first: bool = True) -> Dict[str, int]: # Replace original file shutil.move(temp_path, self.epc_file_path) + self._reopen_persistent_zip() logging.info( f"Rebuilt .rels files: processed {stats['objects_processed']} objects, " diff --git a/energyml-utils/src/energyml/utils/exception.py b/energyml-utils/src/energyml/utils/exception.py index 87e128c..fac041f 100644 --- a/energyml-utils/src/energyml/utils/exception.py +++ b/energyml-utils/src/energyml/utils/exception.py @@ -39,3 +39,10 @@ def __init__(self, t: Optional[str] = None): class UnparsableFile(Exception): def __init__(self, t: Optional[str] = None): super().__init__("File is not parsable for an EPC file. Please use RawFile class for non energyml files.") + + +class NotSupportedError(Exception): + """Exception for not supported features""" + + def __init__(self, msg): + super().__init__(msg) diff --git a/energyml-utils/src/energyml/utils/introspection.py b/energyml-utils/src/energyml/utils/introspection.py index e764eba..db23fed 100644 --- a/energyml-utils/src/energyml/utils/introspection.py +++ b/energyml-utils/src/energyml/utils/introspection.py @@ -233,6 +233,8 @@ def get_module_name_and_type_from_content_or_qualified_type(cqt: str) -> Tuple[s ct = parse_qualified_type(cqt) except AttributeError: pass + if ct is None: + raise ValueError(f"Cannot parse content-type or qualified-type: {cqt}") domain = ct.group("domain") if domain is None: @@ -281,6 +283,10 @@ def get_module_name(domain: str, domain_version: str): return f"energyml.{domain}.{domain_version}.{ns[ns.rindex('/') + 1:]}" +# Track modules that failed to import to avoid duplicate logging +_FAILED_IMPORT_MODULES = set() + + def import_related_module(energyml_module_name: str) -> None: """ Import related modules for a specific energyml module. (See. :const:`RELATED_MODULES`) @@ -292,8 +298,11 @@ def import_related_module(energyml_module_name: str) -> None: for m in related: try: import_module(m) - except Exception: - pass + except Exception as e: + # Only log once per unique module + if m not in _FAILED_IMPORT_MODULES: + _FAILED_IMPORT_MODULES.add(m) + logging.debug(f"Could not import related module {m}: {e}") # logging.error(e) @@ -425,6 +434,10 @@ def get_object_attribute(obj: Any, attr_dot_path: str, force_snake_case=True) -> """ current_attrib_name, path_next = path_next_attribute(attr_dot_path) + if current_attrib_name is None: + logging.error(f"Attribute path '{attr_dot_path}' is invalid.") + return None + if force_snake_case: current_attrib_name = snake_case(current_attrib_name) @@ -517,6 +530,10 @@ def get_object_attribute_or_create( """ current_attrib_name, path_next = path_next_attribute(attr_dot_path) + if current_attrib_name is None: + logging.error(f"Attribute path '{attr_dot_path}' is invalid.") + return None + if force_snake_case: current_attrib_name = snake_case(current_attrib_name) @@ -552,6 +569,10 @@ def get_object_attribute_advanced(obj: Any, attr_dot_path: str) -> Any: current_attrib_name = get_matching_class_attribute_name(obj, current_attrib_name) + if current_attrib_name is None: + logging.error(f"Attribute path '{attr_dot_path}' is invalid.") + return None + value = None if isinstance(obj, list): value = obj[int(current_attrib_name)] @@ -587,9 +608,10 @@ def get_object_attribute_no_verif(obj: Any, attr_name: str, default: Optional[An else: raise AttributeError(obj, name=attr_name) else: - return ( - getattr(obj, attr_name) or default - ) # we did not used the "default" of getattr to keep raising AttributeError + res = getattr(obj, attr_name) + if res is None: # we did not used the "default" of getattr to keep raising AttributeError + return default + return res def get_object_attribute_rgx(obj: Any, attr_dot_path_rgx: str) -> Any: @@ -870,6 +892,9 @@ def search_attribute_matching_name_with_path( # current_match = attrib_list[0] # next_match = ".".join(attrib_list[1:]) current_match, next_match = path_next_attribute(name_rgx) + if current_match is None: + logging.error(f"Attribute name regex '{name_rgx}' is invalid.") + return [] res = [] if current_path is None: @@ -997,7 +1022,7 @@ def set_attribute_from_dict(obj: Any, values: Dict) -> None: set_attribute_from_path(obj=obj, attribute_path=k, value=v) -def set_attribute_from_path(obj: Any, attribute_path: str, value: Any): +def set_attribute_from_path(obj: Any, attribute_path: str, value: Any) -> None: """ Changes the value of a (sub)attribute. Example : @@ -1023,6 +1048,11 @@ def set_attribute_from_path(obj: Any, attribute_path: str, value: Any): """ upper = obj current_attrib_name, path_next = path_next_attribute(attribute_path) + + if current_attrib_name is None: + logging.error(f"Attribute path '{attribute_path}' is invalid.") + return + if path_next is not None: set_attribute_from_path( get_object_attribute( @@ -1066,12 +1096,12 @@ def set_attribute_from_path(obj: Any, attribute_path: str, value: Any): setattr(upper, current_attrib_name, value) -def set_attribute_value(obj: any, attribute_name_rgx, value: Any): +def set_attribute_value(obj: any, attribute_name_rgx, value: Any) -> None: copy_attributes(obj_in={attribute_name_rgx: value}, obj_out=obj, ignore_case=True) def copy_attributes( - obj_in: any, + obj_in: Any, obj_out: Any, only_existing_attributes: bool = True, ignore_case: bool = True, @@ -1081,7 +1111,7 @@ def copy_attributes( p_list = search_attribute_matching_name_with_path( obj=obj_out, name_rgx=k_in, - re_flags=re.IGNORECASE if ignore_case else 0, + re_flags=re.IGNORECASE if ignore_case else re.NOFLAG, deep_search=False, search_in_sub_obj=False, ) @@ -1337,7 +1367,7 @@ def get_qualified_type_from_class(cls: Union[type, Any], print_dev_version=True) return None -def get_object_uri(obj: any, dataspace: Optional[str] = None) -> Optional[Uri]: +def get_object_uri(obj: Any, dataspace: Optional[str] = None) -> Optional[Uri]: """Returns an ETP URI""" return parse_uri(f"eml:///dataspace('{dataspace or ''}')/{get_qualified_type_from_class(obj)}({get_obj_uuid(obj)})") @@ -1522,6 +1552,12 @@ def _gen_str_from_attribute_name(attribute_name: Optional[str], _parent_class: O :param _parent_class: :return: """ + if attribute_name is None: + return ( + "A random str (" + + str(random_value_from_class(int)) + + ") @_gen_str_from_attribute_name attribute 'attribute_name' was None" + ) attribute_name_lw = attribute_name.lower() if attribute_name is not None: if attribute_name_lw == "uuid" or attribute_name_lw == "uid": diff --git a/energyml-utils/src/energyml/utils/manager.py b/energyml-utils/src/energyml/utils/manager.py index 23933b3..10644ad 100644 --- a/energyml-utils/src/energyml/utils/manager.py +++ b/energyml-utils/src/energyml/utils/manager.py @@ -179,7 +179,7 @@ def get_class_pkg(cls): try: p = re.compile(RGX_ENERGYML_MODULE_NAME) match = p.search(cls.__module__) - return match.group("pkg") + return match.group("pkg") # type: ignore except AttributeError as e: logging.error(f"Exception to get class package for '{cls}'") raise e @@ -217,6 +217,8 @@ def reshape_version_from_regex_match( :param nb_digit: The number of digits to keep in the version. :return: The reshaped version string. """ + if match is None: + return "" return reshape_version(match.group("versionNumber"), nb_digit) + ( "dev" + match.group("versionDev") if match.group("versionDev") is not None and print_dev_version else "" ) diff --git a/energyml-utils/src/energyml/utils/storage_interface.py b/energyml-utils/src/energyml/utils/storage_interface.py new file mode 100644 index 0000000..d07299b --- /dev/null +++ b/energyml-utils/src/energyml/utils/storage_interface.py @@ -0,0 +1,362 @@ +# Copyright (c) 2023-2024 Geosiris. +# SPDX-License-Identifier: Apache-2.0 +""" +Unified Storage Interface Module + +This module provides a unified interface for reading and writing energyml objects and arrays, +abstracting away whether the data comes from an ETP server, a local EPC file, or an EPC stream reader. + +The storage interface enables applications to work with energyml data without knowing the +underlying storage mechanism, making it easy to switch between server-based and file-based +workflows. + +Key Components: +- EnergymlStorageInterface: Abstract base class defining the storage interface +- ResourceMetadata: Dataclass for object metadata (similar to ETP Resource) +- DataArrayMetadata: Dataclass for array metadata + +Example Usage: + ```python + from energyml.utils.storage_interface import create_storage + + # Use with EPC file + storage = create_storage("my_data.epc") + + # Same API for all implementations! + obj = storage.get_object("uuid.version") or storage.get_object("eml:///dataspace('default')/resqml22.TriangulatedSetRepresentation('uuid')") + metadata_list = storage.list_objects() + array = storage.read_array(obj, "values/0") + storage.put_object(new_obj) + storage.close() + ``` +""" + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, List, Optional, Union, Tuple + +from energyml.utils.uri import Uri +import numpy as np + + +@dataclass +class ResourceMetadata: + """ + Metadata for an energyml object, similar to ETP Resource. + + This class provides a unified representation of object metadata across + different storage backends (EPC, EPC Stream, ETP). + """ + + uri: str + """URI of the resource (ETP-style uri or identifier)""" + + uuid: str + """Object UUID""" + + title: str + """Object title/name from citation""" + + object_type: str + """Qualified type (e.g., 'resqml20.obj_TriangulatedSetRepresentation')""" + + content_type: str + """Content type (e.g., 'application/x-resqml+xml;version=2.0;type=obj_TriangulatedSetRepresentation')""" + + version: Optional[str] = None + """Object version (optional)""" + + dataspace: Optional[str] = None + """Dataspace name (primarily for ETP)""" + + created: Optional[datetime] = None + """Creation timestamp""" + + last_changed: Optional[datetime] = None + """Last modification timestamp""" + + source_count: Optional[int] = None + """Number of source relationships (objects this references)""" + + target_count: Optional[int] = None + """Number of target relationships (objects referencing this)""" + + custom_data: Dict[str, Any] = field(default_factory=dict) + """Additional custom metadata""" + + @property + def identifier(self) -> str: + """Get object identifier (uuid.version or uuid if no version)""" + if self.version: + return f"{self.uuid}.{self.version}" + return self.uuid + + +@dataclass +class DataArrayMetadata: + """ + Metadata for a data array in an energyml object. + + This provides information about arrays stored in HDF5 or other external storage, + similar to ETP DataArrayMetadata. + """ + + path_in_resource: Optional[str] + """Path to the array within the HDF5 file""" + + array_type: str + """Data type of the array (e.g., 'double', 'int', 'string')""" + + dimensions: List[int] + """Array dimensions/shape""" + + custom_data: Dict[str, Any] = field(default_factory=dict) + """Additional custom metadata""" + + @property + def size(self) -> int: + """Total number of elements in the array""" + result = 1 + for dim in self.dimensions: + result *= dim + return result + + @property + def ndim(self) -> int: + """Number of dimensions""" + return len(self.dimensions) + + @classmethod + def from_numpy_array(cls, path_in_resource: Optional[str], array: np.ndarray) -> "DataArrayMetadata": + """ + Create DataArrayMetadata from a numpy array. + + Args: + path_in_resource: Path to the array within the HDF5 file + array: Numpy array + Returns: + DataArrayMetadata instance + """ + return cls( + path_in_resource=path_in_resource, + array_type=str(array.dtype), + dimensions=list(array.shape), + ) + + @classmethod + def from_list(cls, path_in_resource: Optional[str], data: List[Any]) -> "DataArrayMetadata": + """ + Create DataArrayMetadata from a list. + + Args: + path_in_resource: Path to the array within the HDF5 file + data: List of data + Returns: + DataArrayMetadata instance + """ + array = np.array(data) + return cls.from_numpy_array(path_in_resource, array) + + +class EnergymlStorageInterface(ABC): + """ + Abstract base class for energyml data storage operations. + + This interface defines a common API for interacting with energyml objects and arrays, + regardless of whether they are stored on an ETP server, in a local EPC file, or in + a streaming EPC reader. + + All implementations must provide methods for: + - Getting, putting, and deleting energyml objects + - Reading and writing data arrays + - Getting array metadata + - Listing available objects with metadata + - Transaction support (where applicable) + - Closing the storage connection + """ + + @abstractmethod + def get_object(self, identifier: Union[str, Uri]) -> Optional[Any]: + """ + Retrieve an object by its identifier (UUID or UUID.version). + + Args: + identifier: Object identifier (UUID or UUID.version) or ETP URI + + Returns: + The deserialized energyml object, or None if not found + """ + pass + + @abstractmethod + def get_object_by_uuid(self, uuid: str) -> List[Any]: + """ + Retrieve all objects with the given UUID (all versions). + + Args: + uuid: Object UUID + + Returns: + List of objects with this UUID (may be empty) + """ + pass + + @abstractmethod + def put_object(self, obj: Any, dataspace: Optional[str] = None) -> Optional[str]: + """ + Store an energyml object. + + Args: + obj: The energyml object to store + dataspace: Optional dataspace name (primarily for ETP) + + Returns: + The identifier of the stored object (UUID.version or UUID), or None on error + """ + pass + + @abstractmethod + def delete_object(self, identifier: Union[str, Uri]) -> bool: + """ + Delete an object by its identifier. + + Args: + identifier: Object identifier (UUID or UUID.version) or ETP URI + + Returns: + True if successfully deleted, False otherwise + """ + pass + + @abstractmethod + def read_array(self, proxy: Union[str, Uri, Any], path_in_external: str) -> Optional[np.ndarray]: + """ + Read a data array from external storage (HDF5). + + Args: + proxy: The object identifier/URI or the object itself that references the array + path_in_external: Path within the HDF5 file (e.g., 'values/0') + + Returns: + The data array as a numpy array, or None if not found + """ + pass + + @abstractmethod + def write_array( + self, + proxy: Union[str, Uri, Any], + path_in_external: str, + array: np.ndarray, + ) -> bool: + """ + Write a data array to external storage (HDF5). + + Args: + proxy: The object identifier/URI or the object itself that references the array + path_in_external: Path within the HDF5 file (e.g., 'values/0') + array: The numpy array to write + + Returns: + True if successfully written, False otherwise + """ + pass + + @abstractmethod + def get_array_metadata( + self, proxy: Union[str, Uri, Any], path_in_external: Optional[str] = None + ) -> Union[DataArrayMetadata, List[DataArrayMetadata], None]: + """ + Get metadata for data array(s). + + Args: + proxy: The object identifier/URI or the object itself that references the array + path_in_external: Optional specific path. If None, returns all array metadata for the object + + Returns: + DataArrayMetadata if path specified, List[DataArrayMetadata] if no path, + or None if not found + """ + pass + + @abstractmethod + def list_objects( + self, dataspace: Optional[str] = None, object_type: Optional[str] = None + ) -> List[ResourceMetadata]: + """ + List all objects with their metadata. + + Args: + dataspace: Optional dataspace filter (primarily for ETP) + object_type: Optional type filter (qualified type, e.g., 'resqml20.obj_Grid2dRepresentation') + + Returns: + List of ResourceMetadata for all matching objects + """ + pass + + @abstractmethod + def close(self) -> None: + """ + Close the storage connection and release resources. + """ + pass + + # Transaction support (optional, may raise NotImplementedError) + + def start_transaction(self) -> bool: + """ + Start a transaction (if supported). + + Returns: + True if transaction started, False if not supported + """ + raise NotImplementedError("Transactions not supported by this storage backend") + + def commit_transaction(self) -> Tuple[bool, Optional[str]]: + """ + Commit the current transaction (if supported). + + Returns: + Tuple of (success, transaction_uuid) + """ + raise NotImplementedError("Transactions not supported by this storage backend") + + def rollback_transaction(self) -> bool: + """ + Rollback the current transaction (if supported). + + Returns: + True if rolled back successfully + """ + raise NotImplementedError("Transactions not supported by this storage backend") + + # Additional utility methods + + def get_object_dependencies(self, identifier: Union[str, Uri]) -> List[str]: + """ + Get list of object identifiers that this object depends on (references). + + Args: + identifier: Object identifier + + Returns: + List of identifiers of objects this object references + """ + raise NotImplementedError("Dependency tracking not implemented by this storage backend") + + def __enter__(self): + """Context manager entry""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit""" + self.close() + + +__all__ = [ + "EnergymlStorageInterface", + "ResourceMetadata", + "DataArrayMetadata", +] diff --git a/energyml-utils/src/energyml/utils/workspace.py b/energyml-utils/src/energyml/utils/workspace.py deleted file mode 100644 index 8371644..0000000 --- a/energyml-utils/src/energyml/utils/workspace.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (c) 2023-2024 Geosiris. -# SPDX-License-Identifier: Apache-2.0 -from abc import abstractmethod -from dataclasses import dataclass -from typing import Optional, Any, Union - -from energyml.utils.uri import Uri -import numpy as np - - -@dataclass -class EnergymlWorkspace: - def get_object(self, uuid: str, object_version: Optional[str]) -> Optional[Any]: - raise NotImplementedError("EnergymlWorkspace.get_object") - - def get_object_by_identifier(self, identifier: str) -> Optional[Any]: - _tmp = identifier.split(".") - return self.get_object(_tmp[0], _tmp[1] if len(_tmp) > 1 else None) - - def get_object_by_uuid(self, uuid: str) -> Optional[Any]: - return self.get_object(uuid, None) - - # def read_external_array( - # self, - # energyml_array: Any, - # root_obj: Optional[Any] = None, - # path_in_root: Optional[str] = None, - # ) -> List[Any]: - # raise NotImplementedError("EnergymlWorkspace.get_object") - - @abstractmethod - def add_object(self, obj: Any) -> bool: - raise NotImplementedError("EnergymlWorkspace.add_object") - - @abstractmethod - def remove_object(self, identifier: Union[str, Uri]) -> None: - raise NotImplementedError("EnergymlWorkspace.remove_object") - - @abstractmethod - def read_array(self, proxy: Union[str, Uri, Any], path_in_external: str) -> Optional[np.ndarray]: - raise NotImplementedError("EnergymlWorkspace.read_array") - - @abstractmethod - def write_array(self, proxy: Union[str, Uri, Any], path_in_external: str, array: Any) -> bool: - raise NotImplementedError("EnergymlWorkspace.write_array") diff --git a/energyml-utils/tests/test_epc.py b/energyml-utils/tests/test_epc.py index 11626a8..de6ea53 100644 --- a/energyml-utils/tests/test_epc.py +++ b/energyml-utils/tests/test_epc.py @@ -9,13 +9,13 @@ from energyml.resqml.v2_0_1.resqmlv2 import FaultInterpretation from energyml.resqml.v2_2.resqmlv2 import TriangulatedSetRepresentation -from src.energyml.utils.epc import ( +from energyml.utils.epc import ( as_dor, get_obj_identifier, gen_energyml_object_path, EpcExportVersion, ) -from src.energyml.utils.introspection import ( +from energyml.utils.introspection import ( epoch_to_date, epoch, gen_uuid, diff --git a/energyml-utils/tests/test_xml.py b/energyml-utils/tests/test_xml.py index 4c454af..4bf1f67 100644 --- a/energyml-utils/tests/test_xml.py +++ b/energyml-utils/tests/test_xml.py @@ -3,6 +3,7 @@ import logging +from scripts.optimized_constants import parse_qualified_type from src.energyml.utils.xml import * CT_20 = "application/x-resqml+xml;version=2.0;type=obj_TriangulatedSetRepresentation"