geosiris-technologies · valentin-gauthier-geosiris · Dec 3, 2025 · Dec 4, 2025 · Dec 4, 2025 · Dec 5, 2025
diff --git a/energyml-utils/.flake8 b/energyml-utils/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 # Ignore specific error codes (comma-separated list)
-ignore = E501, E722, W503, F403, E203, E202
+ignore = E501, E722, W503, F403, E203, E202, E402
 
 # Max line length (default is 79, can be changed)
 max-line-length = 120

diff --git a/energyml-utils/.gitignore b/energyml-utils/.gitignore
@@ -44,6 +44,7 @@ sample/
 gen*/
 manip*
 *.epc
+*.h5
 *.off
 *.obj
 *.log
@@ -54,6 +55,13 @@ manip*
 
 *.xml
 *.json
+docs/*.md
+
+# DATA
+*.obj
+*.geojson
+*.vtk
+*.stl
 
 
 # WIP

diff --git a/energyml-utils/example/epc_stream_keep_open_example.py b/energyml-utils/example/epc_stream_keep_open_example.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+# Copyright (c) 2023-2024 Geosiris.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Example demonstrating the keep_open feature of EpcStreamReader.
+
+This example shows how using keep_open=True improves performance when
+performing multiple operations on an EPC file by keeping the ZIP file
+open instead of reopening it for each operation.
+"""
+
+import time
+import sys
+from pathlib import Path
+
+# Add src directory to path
+src_path = Path(__file__).parent.parent / "src"
+sys.path.insert(0, str(src_path))
+
+from energyml.utils.epc_stream import EpcStreamReader
+
+
+def benchmark_without_keep_open(epc_path: str, num_operations: int = 10):
+    """Benchmark reading objects without keep_open."""
+    print(f"\nBenchmark WITHOUT keep_open ({num_operations} operations):")
+    print("=" * 60)
+
+    start = time.time()
+
+    # Create reader without keep_open
+    with EpcStreamReader(epc_path, keep_open=False, cache_size=5) as reader:
+        metadata_list = reader.list_object_metadata()
+
+        if not metadata_list:
+            print("  No objects in EPC file")
+            return 0
+
+        # Perform multiple read operations
+        for i in range(min(num_operations, len(metadata_list))):
+            meta = metadata_list[i % len(metadata_list)]
+            if meta.identifier:
+                _ = reader.get_object_by_identifier(meta.identifier)
+            if i == 0:
+                print(f"  First object: {meta.object_type}")
+
+    elapsed = time.time() - start
+    print(f"  Time: {elapsed:.4f}s")
+    print(f"  Avg per operation: {elapsed / num_operations:.4f}s")
+
+    return elapsed
+
+
+def benchmark_with_keep_open(epc_path: str, num_operations: int = 10):
+    """Benchmark reading objects with keep_open."""
+    print(f"\nBenchmark WITH keep_open ({num_operations} operations):")
+    print("=" * 60)
+
+    start = time.time()
+
+    # Create reader with keep_open
+    with EpcStreamReader(epc_path, keep_open=True, cache_size=5) as reader:
+        metadata_list = reader.list_object_metadata()
+
+        if not metadata_list:
+            print("  No objects in EPC file")
+            return 0
+
+        # Perform multiple read operations
+        for i in range(min(num_operations, len(metadata_list))):
+            meta = metadata_list[i % len(metadata_list)]
+            if meta.identifier:
+                _ = reader.get_object_by_identifier(meta.identifier)
+            if i == 0:
+                print(f"  First object: {meta.object_type}")
+
+    elapsed = time.time() - start
+    print(f"  Time: {elapsed:.4f}s")
+    print(f"  Avg per operation: {elapsed / num_operations:.4f}s")
+
+    return elapsed
+
+
+def demonstrate_file_modification_with_keep_open(epc_path: str):
+    """Demonstrate that modifications work correctly with keep_open."""
+    print("\nDemonstrating file modifications with keep_open:")
+    print("=" * 60)
+
+    with EpcStreamReader(epc_path, keep_open=True) as reader:
+        metadata_list = reader.list_object_metadata()
+        original_count = len(metadata_list)
+        print(f"  Original object count: {original_count}")
+
+        if metadata_list:
+            # Get first object
+            first_obj = reader.get_object_by_identifier(metadata_list[0].identifier)
+            print(f"  Retrieved object: {metadata_list[0].object_type}")
+
+            # Update the object (re-add it)
+            identifier = reader.update_object(first_obj)
+            print(f"  Updated object: {identifier}")
+
+            # Verify we can still read it after update
+            updated_obj = reader.get_object_by_identifier(identifier)
+            assert updated_obj is not None, "Failed to read object after update"
+            print("  ✓ Object successfully read after update")
+
+            # Verify object count is the same
+            new_metadata_list = reader.list_object_metadata()
+            new_count = len(new_metadata_list)
+            print(f"  New object count: {new_count}")
+
+            if new_count == original_count:
+                print("  ✓ Object count unchanged (correct)")
+            else:
+                print(f"  ✗ Object count changed: {original_count} -> {new_count}")
+
+
+def demonstrate_proper_cleanup():
+    """Demonstrate that persistent ZIP file is properly closed."""
+    print("\nDemonstrating proper cleanup:")
+    print("=" * 60)
+
+    temp_path = "temp_test.epc"
+
+    try:
+        # Create a temporary EPC file
+        reader = EpcStreamReader(temp_path, keep_open=True)
+        print("  Created EpcStreamReader with keep_open=True")
+
+        # Manually close
+        reader.close()
+        print("  ✓ Manually closed reader")
+
+        # Create another reader and let it go out of scope
+        reader2 = EpcStreamReader(temp_path, keep_open=True)
+        print("  Created second EpcStreamReader")
+        del reader2
+        print("  ✓ Reader deleted (automatic cleanup via __del__)")
+
+        # Create reader in context manager
+        with EpcStreamReader(temp_path, keep_open=True) as _:
+            print("  Created third EpcStreamReader in context manager")
+        print("  ✓ Context manager exited (automatic cleanup)")
+
+    finally:
+        # Clean up temp file
+        if Path(temp_path).exists():
+            Path(temp_path).unlink()
+
+
+def main():
+    """Run all examples."""
+    print("EpcStreamReader keep_open Feature Demonstration")
+    print("=" * 60)
+
+    # You'll need to provide a valid EPC file path
+    epc_path = "wip/epc_test.epc"
+
+    if not Path(epc_path).exists():
+        print(f"\nError: EPC file not found: {epc_path}")
+        print("Please provide a valid EPC file path in the script.")
+        print("\nRunning cleanup demonstration only:")
+        demonstrate_proper_cleanup()
+        return
+
+    try:
+        # Run benchmarks
+        num_ops = 20
+
+        time_without = benchmark_without_keep_open(epc_path, num_ops)
+        time_with = benchmark_with_keep_open(epc_path, num_ops)
+
+        # Show comparison
+        print("\n" + "=" * 60)
+        print("Performance Comparison:")
+        print("=" * 60)
+        if time_with > 0 and time_without > 0:
+            speedup = time_without / time_with
+            improvement = ((time_without - time_with) / time_without) * 100
+            print(f"  Speedup: {speedup:.2f}x")
+            print(f"  Improvement: {improvement:.1f}%")
+
+            if speedup > 1.1:
+                print("\n  ✓ keep_open=True significantly improves performance!")
+            elif speedup > 1.0:
+                print("\n  ✓ keep_open=True slightly improves performance")
+            else:
+                print("\n  Note: For this workload, the difference is minimal")
+                print("       (cache effects or small file)")
+
+        # Demonstrate modifications
+        demonstrate_file_modification_with_keep_open(epc_path)
+
+        # Demonstrate cleanup
+        demonstrate_proper_cleanup()
+
+        print("\n" + "=" * 60)
+        print("All demonstrations completed successfully!")
+        print("=" * 60)
+
+    except Exception as e:
+        print(f"\nError: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/energyml-utils/example/main.py b/energyml-utils/example/main.py
@@ -1,14 +1,27 @@
 # Copyright (c) 2023-2024 Geosiris.
 # SPDX-License-Identifier: Apache-2.0
 import sys
+import logging
 from pathlib import Path
 import re
 from dataclasses import fields
 
+from energyml.utils.constants import (
+    RGX_CONTENT_TYPE,
+    EpcExportVersion,
+    date_to_epoch,
+    epoch,
+    epoch_to_date,
+    gen_uuid,
+    get_domain_version_from_content_or_qualified_type,
+    parse_content_or_qualified_type,
+    parse_content_type,
+)
+
 src_path = Path(__file__).parent.parent / "src"
 sys.path.insert(0, str(src_path))
 
-from energyml.eml.v2_3.commonv2 import *
+from energyml.eml.v2_3.commonv2 import Citation, DataObjectReference, ExistenceKind, Activity
 from energyml.eml.v2_3.commonv2 import AbstractObject
 from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array
 from energyml.resqml.v2_0_1.resqmlv2 import TriangulatedSetRepresentation as Tr20
@@ -22,17 +35,70 @@
 
 # from src.energyml.utils.data.hdf import *
 from energyml.utils.data.helper import get_projected_uom, is_z_reversed
-from energyml.utils.epc import *
-from energyml.utils.introspection import *
-from energyml.utils.manager import *
-from energyml.utils.serialization import *
+from energyml.utils.epc import (
+    Epc,
+    EPCRelsRelationshipType,
+    as_dor,
+    create_energyml_object,
+    create_external_part_reference,
+    gen_energyml_object_path,
+    get_reverse_dor_list,
+)
+from energyml.utils.introspection import (
+    class_match_rgx,
+    copy_attributes,
+    get_class_attributes,
+    get_class_fields,
+    get_class_from_content_type,
+    get_class_from_name,
+    get_class_from_qualified_type,
+    get_class_methods,
+    get_content_type_from_class,
+    get_obj_pkg_pkgv_type_uuid_version,
+    get_obj_uri,
+    get_object_attribute,
+    get_obj_uuid,
+    get_object_attribute_rgx,
+    get_qualified_type_from_class,
+    is_abstract,
+    is_primitive,
+    random_value_from_class,
+    search_attribute_matching_name,
+    search_attribute_matching_name_with_path,
+    search_attribute_matching_type,
+    search_attribute_matching_type_with_path,
+)
+from energyml.utils.manager import (
+    # create_energyml_object,
+    # create_external_part_reference,
+    dict_energyml_modules,
+    get_class_pkg,
+    get_class_pkg_version,
+    get_classes_matching_name,
+    get_sub_classes,
+    list_energyml_modules,
+)
+from energyml.utils.serialization import (
+    read_energyml_xml_file,
+    read_energyml_xml_str,
+    serialize_json,
+    JSON_VERSION,
+    serialize_xml,
+)
 from energyml.utils.validation import (
     patterns_validation,
     dor_validation,
     validate_epc,
     correct_dor,
 )
-from energyml.utils.xml import *
+from energyml.utils.xml import (
+    find_schema_version_in_element,
+    get_class_name_from_xml,
+    get_root_namespace,
+    get_root_type,
+    get_tree,
+    get_xml_encoding,
+)
 from energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path
 
 fi_cit = Citation(
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,6 +44,7 @@ sample/ @@
     gen*/
     manip*
     *.epc
+    *.h5
     *.off
     *.obj
     *.log
@@ Expand All / @@ -54,6 +55,13 @@ manip* @@
     *.xml
     *.json
+    docs/*.md
+    # DATA
+    *.obj
+    *.geojson
+    *.vtk
+    *.stl
     # WIP
@@ Expand Down @@