Skip to content
2 changes: 1 addition & 1 deletion energyml-utils/.flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
# Ignore specific error codes (comma-separated list)
ignore = E501, E722, W503, F403, E203, E202
ignore = E501, E722, W503, F403, E203, E202, E402

# Max line length (default is 79, can be changed)
max-line-length = 120
Expand Down
8 changes: 8 additions & 0 deletions energyml-utils/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ sample/
gen*/
manip*
*.epc
*.h5
*.off
*.obj
*.log
Expand All @@ -54,6 +55,13 @@ manip*

*.xml
*.json
docs/*.md

# DATA
*.obj
*.geojson
*.vtk
*.stl


# WIP
Expand Down
209 changes: 209 additions & 0 deletions energyml-utils/example/epc_stream_keep_open_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
#!/usr/bin/env python
# Copyright (c) 2023-2024 Geosiris.
# SPDX-License-Identifier: Apache-2.0
"""
Example demonstrating the keep_open feature of EpcStreamReader.

This example shows how using keep_open=True improves performance when
performing multiple operations on an EPC file by keeping the ZIP file
open instead of reopening it for each operation.
"""

import time
import sys
from pathlib import Path

# Add src directory to path
src_path = Path(__file__).parent.parent / "src"
sys.path.insert(0, str(src_path))

from energyml.utils.epc_stream import EpcStreamReader


def benchmark_without_keep_open(epc_path: str, num_operations: int = 10):
"""Benchmark reading objects without keep_open."""
print(f"\nBenchmark WITHOUT keep_open ({num_operations} operations):")
print("=" * 60)

start = time.time()

# Create reader without keep_open
with EpcStreamReader(epc_path, keep_open=False, cache_size=5) as reader:
metadata_list = reader.list_object_metadata()

if not metadata_list:
print(" No objects in EPC file")
return 0

# Perform multiple read operations
for i in range(min(num_operations, len(metadata_list))):
meta = metadata_list[i % len(metadata_list)]
if meta.identifier:
_ = reader.get_object_by_identifier(meta.identifier)
if i == 0:
print(f" First object: {meta.object_type}")

elapsed = time.time() - start
print(f" Time: {elapsed:.4f}s")
print(f" Avg per operation: {elapsed / num_operations:.4f}s")

return elapsed


def benchmark_with_keep_open(epc_path: str, num_operations: int = 10):
"""Benchmark reading objects with keep_open."""
print(f"\nBenchmark WITH keep_open ({num_operations} operations):")
print("=" * 60)

start = time.time()

# Create reader with keep_open
with EpcStreamReader(epc_path, keep_open=True, cache_size=5) as reader:
metadata_list = reader.list_object_metadata()

if not metadata_list:
print(" No objects in EPC file")
return 0

# Perform multiple read operations
for i in range(min(num_operations, len(metadata_list))):
meta = metadata_list[i % len(metadata_list)]
if meta.identifier:
_ = reader.get_object_by_identifier(meta.identifier)
if i == 0:
print(f" First object: {meta.object_type}")

elapsed = time.time() - start
print(f" Time: {elapsed:.4f}s")
print(f" Avg per operation: {elapsed / num_operations:.4f}s")

return elapsed


def demonstrate_file_modification_with_keep_open(epc_path: str):
"""Demonstrate that modifications work correctly with keep_open."""
print("\nDemonstrating file modifications with keep_open:")
print("=" * 60)

with EpcStreamReader(epc_path, keep_open=True) as reader:
metadata_list = reader.list_object_metadata()
original_count = len(metadata_list)
print(f" Original object count: {original_count}")

if metadata_list:
# Get first object
first_obj = reader.get_object_by_identifier(metadata_list[0].identifier)
print(f" Retrieved object: {metadata_list[0].object_type}")

# Update the object (re-add it)
identifier = reader.update_object(first_obj)
print(f" Updated object: {identifier}")

# Verify we can still read it after update
updated_obj = reader.get_object_by_identifier(identifier)
assert updated_obj is not None, "Failed to read object after update"
print(" ✓ Object successfully read after update")

# Verify object count is the same
new_metadata_list = reader.list_object_metadata()
new_count = len(new_metadata_list)
print(f" New object count: {new_count}")

if new_count == original_count:
print(" ✓ Object count unchanged (correct)")
else:
print(f" ✗ Object count changed: {original_count} -> {new_count}")


def demonstrate_proper_cleanup():
"""Demonstrate that persistent ZIP file is properly closed."""
print("\nDemonstrating proper cleanup:")
print("=" * 60)

temp_path = "temp_test.epc"

try:
# Create a temporary EPC file
reader = EpcStreamReader(temp_path, keep_open=True)
print(" Created EpcStreamReader with keep_open=True")

# Manually close
reader.close()
print(" ✓ Manually closed reader")

# Create another reader and let it go out of scope
reader2 = EpcStreamReader(temp_path, keep_open=True)
print(" Created second EpcStreamReader")
del reader2
print(" ✓ Reader deleted (automatic cleanup via __del__)")

# Create reader in context manager
with EpcStreamReader(temp_path, keep_open=True) as _:
print(" Created third EpcStreamReader in context manager")
print(" ✓ Context manager exited (automatic cleanup)")

finally:
# Clean up temp file
if Path(temp_path).exists():
Path(temp_path).unlink()


def main():
"""Run all examples."""
print("EpcStreamReader keep_open Feature Demonstration")
print("=" * 60)

# You'll need to provide a valid EPC file path
epc_path = "wip/epc_test.epc"

if not Path(epc_path).exists():
print(f"\nError: EPC file not found: {epc_path}")
print("Please provide a valid EPC file path in the script.")
print("\nRunning cleanup demonstration only:")
demonstrate_proper_cleanup()
return

try:
# Run benchmarks
num_ops = 20

time_without = benchmark_without_keep_open(epc_path, num_ops)
time_with = benchmark_with_keep_open(epc_path, num_ops)

# Show comparison
print("\n" + "=" * 60)
print("Performance Comparison:")
print("=" * 60)
if time_with > 0 and time_without > 0:
speedup = time_without / time_with
improvement = ((time_without - time_with) / time_without) * 100
print(f" Speedup: {speedup:.2f}x")
print(f" Improvement: {improvement:.1f}%")

if speedup > 1.1:
print("\n ✓ keep_open=True significantly improves performance!")
elif speedup > 1.0:
print("\n ✓ keep_open=True slightly improves performance")
else:
print("\n Note: For this workload, the difference is minimal")
print(" (cache effects or small file)")

# Demonstrate modifications
demonstrate_file_modification_with_keep_open(epc_path)

# Demonstrate cleanup
demonstrate_proper_cleanup()

print("\n" + "=" * 60)
print("All demonstrations completed successfully!")
print("=" * 60)

except Exception as e:
print(f"\nError: {e}")
import traceback

traceback.print_exc()


if __name__ == "__main__":
main()
78 changes: 72 additions & 6 deletions energyml-utils/example/main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
# Copyright (c) 2023-2024 Geosiris.
# SPDX-License-Identifier: Apache-2.0
import sys
import logging
from pathlib import Path
import re
from dataclasses import fields

from energyml.utils.constants import (
RGX_CONTENT_TYPE,
EpcExportVersion,
date_to_epoch,
epoch,
epoch_to_date,
gen_uuid,
get_domain_version_from_content_or_qualified_type,
parse_content_or_qualified_type,
parse_content_type,
)

src_path = Path(__file__).parent.parent / "src"
sys.path.insert(0, str(src_path))

from energyml.eml.v2_3.commonv2 import *
from energyml.eml.v2_3.commonv2 import Citation, DataObjectReference, ExistenceKind, Activity
from energyml.eml.v2_3.commonv2 import AbstractObject
from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array
from energyml.resqml.v2_0_1.resqmlv2 import TriangulatedSetRepresentation as Tr20
Expand All @@ -22,17 +35,70 @@

# from src.energyml.utils.data.hdf import *
from energyml.utils.data.helper import get_projected_uom, is_z_reversed
from energyml.utils.epc import *
from energyml.utils.introspection import *
from energyml.utils.manager import *
from energyml.utils.serialization import *
from energyml.utils.epc import (
Epc,
EPCRelsRelationshipType,
as_dor,
create_energyml_object,
create_external_part_reference,
gen_energyml_object_path,
get_reverse_dor_list,
)
from energyml.utils.introspection import (
class_match_rgx,
copy_attributes,
get_class_attributes,
get_class_fields,
get_class_from_content_type,
get_class_from_name,
get_class_from_qualified_type,
get_class_methods,
get_content_type_from_class,
get_obj_pkg_pkgv_type_uuid_version,
get_obj_uri,
get_object_attribute,
get_obj_uuid,
get_object_attribute_rgx,
get_qualified_type_from_class,
is_abstract,
is_primitive,
random_value_from_class,
search_attribute_matching_name,
search_attribute_matching_name_with_path,
search_attribute_matching_type,
search_attribute_matching_type_with_path,
)
from energyml.utils.manager import (
# create_energyml_object,
# create_external_part_reference,
dict_energyml_modules,
get_class_pkg,
get_class_pkg_version,
get_classes_matching_name,
get_sub_classes,
list_energyml_modules,
)
from energyml.utils.serialization import (
read_energyml_xml_file,
read_energyml_xml_str,
serialize_json,
JSON_VERSION,
serialize_xml,
)
from energyml.utils.validation import (
patterns_validation,
dor_validation,
validate_epc,
correct_dor,
)
from energyml.utils.xml import *
from energyml.utils.xml import (
find_schema_version_in_element,
get_class_name_from_xml,
get_root_namespace,
get_root_type,
get_tree,
get_xml_encoding,
)
from energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path

fi_cit = Citation(
Expand Down
Loading