From a12b10f76e93e03b5cf085a255a618d566b37b43 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Fri, 6 Feb 2026 16:43:15 +0000 Subject: [PATCH 01/15] Add function to create interleaved VDS --- .../controllers/odin/generate_vds.py | 107 ++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 src/fastcs_eiger/controllers/odin/generate_vds.py diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py new file mode 100644 index 0000000..376e5f3 --- /dev/null +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -0,0 +1,107 @@ +import math +from pathlib import Path + +import h5py +import numpy as np + + +def create_interleave_vds( + path: str, + frame_count: int, + frames_per_block: int, + blocks_per_file: int, + frame_shape: tuple[int, int], +) -> None: + dtype = "float" + dataset_name = "data" + frames_per_file = min( + (frames_per_block * blocks_per_file if blocks_per_file else frame_count), + frame_count, + ) + n_files = math.ceil(frame_count / frames_per_file) + + file_name_prefix = Path(path).with_suffix("") + filepaths = [f"{file_name_prefix}_{str(i + 1).zfill(6)}.h5" for i in range(n_files)] + + min_frames_per_file = frames_per_file - frames_per_block + remainder = frame_count - (min_frames_per_file * n_files) + + v_layout = h5py.VirtualLayout( + shape=(frame_count, frame_shape[0], frame_shape[1]), + dtype=dtype, + ) + + for file_idx, filepath in enumerate(filepaths): + remaining = max(remainder - (frames_per_block * file_idx), 0) + frames_in_file = min_frames_per_file + min(frames_per_block, remaining) + block_remainder = frames_in_file % frames_per_block + + # MultiBlockSlice cannot contain partial blocks + blocked_frames = frames_in_file - block_remainder + + v_source = h5py.VirtualSource( + filepath, + name=dataset_name, + shape=(frames_in_file, frame_shape[0], frame_shape[1]), + dtype=dtype, + ) + + start = file_idx * frames_per_block + stride = n_files * frames_per_block + n_blocks = blocked_frames // frames_per_block + + if n_blocks: + source = v_source[:blocked_frames, :, :] + v_layout[ + h5py.MultiBlockSlice( + start=start, stride=stride, count=n_blocks, block=frames_per_block + ), + :, + :, + ] = source + + if block_remainder: + # Last few frames that don't fit into a block + source = v_source[blocked_frames:frames_in_file, :, :] + v_layout[frame_count - block_remainder : frame_count, :, :] = source + + with h5py.File(path, "w", libver="latest") as f: + f.create_virtual_dataset(dataset_name, v_layout, fillvalue=0) + + +def get_frame(n, shape=(10, 10)): + return np.full(shape, n) + + +def get_round_robin_arrays( + frames: int, block_size: int, n_files: int +) -> list[np.ndarray]: + arrays = [[] for _ in range(n_files)] + frame = 0 + while frame < frames: + data_array = arrays[frame // block_size % n_files] + for _ in range(block_size): + data_array.append(get_frame(frame)) + frame += 1 + if frame == frames: + break + return [np.array(array) for array in arrays] + + +def simulate_round_robin(path: str, frames: int, block_size: int, n_files: int): + file_name_prefix = Path(path).with_suffix("") + filepaths = [f"{file_name_prefix}_{str(i + 1).zfill(6)}.h5" for i in range(n_files)] + arrays = get_round_robin_arrays(frames, block_size, n_files) + for filepath, data_array in zip(filepaths, arrays, strict=True): + with h5py.File(filepath, "w") as f: + f.create_dataset("data", data=data_array) + + +path = "test.h5" +frames = 105 +block_size = 10 +n_files = 4 +blocks_per_file = 3 + +simulate_round_robin(path, frames, block_size, n_files) +create_interleave_vds(path, frames, block_size, blocks_per_file, (10, 10)) From f86b01bf391114c43dae17140fce801286b75cff Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Mon, 9 Feb 2026 15:06:35 +0000 Subject: [PATCH 02/15] Fix bug --- .../controllers/odin/generate_vds.py | 72 ++++++------------- 1 file changed, 23 insertions(+), 49 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index 376e5f3..1c53f2e 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -2,7 +2,21 @@ from pathlib import Path import h5py -import numpy as np + + +def get_split_frame_numbers( + frame_count: int, frames_per_block: int, n_files: int +) -> list[int]: + frame_numbers_per_file = [] + n_blocks = math.ceil(frame_count / frames_per_block) + min_blocks_per_file = n_blocks // n_files + remainder = n_blocks - min_blocks_per_file * n_files + for i in range(n_files): + blocks = min_blocks_per_file + (i < remainder) + frame_numbers_per_file.append(blocks * frames_per_block) + overflow = sum(frame_numbers_per_file) - frame_count + frame_numbers_per_file[remainder - 1] -= overflow + return frame_numbers_per_file def create_interleave_vds( @@ -19,24 +33,22 @@ def create_interleave_vds( frame_count, ) n_files = math.ceil(frame_count / frames_per_file) - file_name_prefix = Path(path).with_suffix("") filepaths = [f"{file_name_prefix}_{str(i + 1).zfill(6)}.h5" for i in range(n_files)] - - min_frames_per_file = frames_per_file - frames_per_block - remainder = frame_count - (min_frames_per_file * n_files) + split_frame_numbers = get_split_frame_numbers( + frame_count, frames_per_block, n_files + ) v_layout = h5py.VirtualLayout( shape=(frame_count, frame_shape[0], frame_shape[1]), dtype=dtype, ) - for file_idx, filepath in enumerate(filepaths): - remaining = max(remainder - (frames_per_block * file_idx), 0) - frames_in_file = min_frames_per_file + min(frames_per_block, remaining) - block_remainder = frames_in_file % frames_per_block - + for file_idx, (filepath, frames_in_file) in enumerate( + zip(filepaths, split_frame_numbers, strict=True) + ): # MultiBlockSlice cannot contain partial blocks + block_remainder = frames_in_file % frames_per_block blocked_frames = frames_in_file - block_remainder v_source = h5py.VirtualSource( @@ -66,42 +78,4 @@ def create_interleave_vds( v_layout[frame_count - block_remainder : frame_count, :, :] = source with h5py.File(path, "w", libver="latest") as f: - f.create_virtual_dataset(dataset_name, v_layout, fillvalue=0) - - -def get_frame(n, shape=(10, 10)): - return np.full(shape, n) - - -def get_round_robin_arrays( - frames: int, block_size: int, n_files: int -) -> list[np.ndarray]: - arrays = [[] for _ in range(n_files)] - frame = 0 - while frame < frames: - data_array = arrays[frame // block_size % n_files] - for _ in range(block_size): - data_array.append(get_frame(frame)) - frame += 1 - if frame == frames: - break - return [np.array(array) for array in arrays] - - -def simulate_round_robin(path: str, frames: int, block_size: int, n_files: int): - file_name_prefix = Path(path).with_suffix("") - filepaths = [f"{file_name_prefix}_{str(i + 1).zfill(6)}.h5" for i in range(n_files)] - arrays = get_round_robin_arrays(frames, block_size, n_files) - for filepath, data_array in zip(filepaths, arrays, strict=True): - with h5py.File(filepath, "w") as f: - f.create_dataset("data", data=data_array) - - -path = "test.h5" -frames = 105 -block_size = 10 -n_files = 4 -blocks_per_file = 3 - -simulate_round_robin(path, frames, block_size, n_files) -create_interleave_vds(path, frames, block_size, blocks_per_file, (10, 10)) + f.create_virtual_dataset(dataset_name, v_layout) From 71def198177d35a9700d53e1c7a086e6cac193f7 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Mon, 9 Feb 2026 15:07:03 +0000 Subject: [PATCH 03/15] Add h5py to dependancies --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 7f0744c..44f99fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ dependencies = [ "numpy", "pillow", "typer", + "h5py", ] # Add project dependencies here, e.g. ["click", "numpy"] dynamic = ["version"] license.file = "LICENSE" From 8f99c7fbb49c3a54995d1e1a3010878c9c8c5b82 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Mon, 9 Feb 2026 15:07:15 +0000 Subject: [PATCH 04/15] Add tests --- tests/test_generate_vds.py | 94 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 tests/test_generate_vds.py diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py new file mode 100644 index 0000000..9fb7bd7 --- /dev/null +++ b/tests/test_generate_vds.py @@ -0,0 +1,94 @@ +from unittest.mock import MagicMock, call, patch + +import h5py +import pytest + +from fastcs_eiger.controllers.odin.generate_vds import ( + create_interleave_vds, + get_split_frame_numbers, +) + + +@pytest.mark.parametrize( + "frame_count, frames_per_block, n_files, expected_split_frames", + [ + [10, 1, 3, [4, 3, 3]], + [10, 1, 10, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], + [10, 4, 10, [4, 4, 2, 0, 0, 0, 0, 0, 0, 0]], + [10, 1, 1, [10]], + [10, 4, 2, [6, 4]], + [10, 4, 3, [4, 4, 2]], + [10, 3, 3, [4, 3, 3]], + [105, 10, 3, [40, 35, 30]], + [1000000, 500, 4, [250000, 250000, 250000, 250000]], + ], +) +def test_get_split_frame_numbers_splits_frames_correctly( + frame_count: int, + frames_per_block: int, + n_files: int, + expected_split_frames: list[int], +): + split_frames_numbers = get_split_frame_numbers( + frame_count, frames_per_block, n_files + ) + assert split_frames_numbers == expected_split_frames + + +@pytest.mark.parametrize( + "frame_count, frames_per_block, blocks_per_file, expected_files", + [ + [100, 10, 5, {b"test_000001.h5", b"test_000002.h5"}], + [105, 10, 5, {b"test_000001.h5", b"test_000002.h5", b"test_000003.h5"}], + [25, 10, 1, {b"test_000001.h5", b"test_000002.h5", b"test_000003.h5"}], + [105, 10, 0, {b"test_000001.h5"}], + [1000, 2, 0, {b"test_000001.h5"}], + [ + 100, + 10, + 3, + { + b"test_000001.h5", + b"test_000002.h5", + b"test_000003.h5", + b"test_000004.h5", + }, + ], + ], +) +def test_create_interleave_vds_layout_contains_expected_files_and_has_expected_shape( + frame_count: int, + frames_per_block: int, + blocks_per_file: int, + expected_files: set[str], +): + mock_file = MagicMock() + mock_f = MagicMock() + mock_file.__enter__.return_value = mock_f + with patch( + "fastcs_eiger.controllers.odin.generate_vds.h5py.File", return_value=mock_file + ): + create_interleave_vds( + "test", frame_count, frames_per_block, blocks_per_file, (1, 1) + ) + layout: h5py.VirtualLayout = mock_f.create_virtual_dataset.call_args[0][1] + assert layout._src_filenames == expected_files + assert layout.shape == (frame_count, 1, 1) + + +@patch("fastcs_eiger.controllers.odin.generate_vds.h5py.VirtualSource") +@patch("fastcs_eiger.controllers.odin.generate_vds.h5py.VirtualLayout") +@patch("fastcs_eiger.controllers.odin.generate_vds.h5py.File") +def test_create_interleave_cds_makes_expected_source_layout_calls( + mock_file: MagicMock, mock_virtual_layoud: MagicMock, mock_virtual_source: MagicMock +): + create_interleave_vds("test.h5", 105, 10, 3, (10, 10)) + expected_split_frames = [30, 30, 25, 20] + assert len(mock_virtual_source.call_args_list) == 4 + for i, expected_frames in enumerate(expected_split_frames): + assert mock_virtual_source.call_args_list[i] == call( + f"test_00000{i + 1}.h5", + name="data", + shape=(expected_frames, 10, 10), + dtype="float", + ) From 334b0caf2b5cea6dded4ea54191635b59fae0886 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Mon, 9 Feb 2026 15:49:37 +0000 Subject: [PATCH 05/15] Add more tests --- tests/test_generate_vds.py | 107 +++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index 9fb7bd7..0ee2aa8 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -1,6 +1,7 @@ from unittest.mock import MagicMock, call, patch import h5py +import numpy as np import pytest from fastcs_eiger.controllers.odin.generate_vds import ( @@ -92,3 +93,109 @@ def test_create_interleave_cds_makes_expected_source_layout_calls( shape=(expected_frames, 10, 10), dtype="float", ) + + +@pytest.fixture +def mock_round_robin_data() -> tuple[ + np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray +]: + file1_data = np.array( + [ + [[0, 0], [0, 0]], + [[1, 1], [1, 1]], + [[8, 8], [8, 8]], + [[9, 9], [9, 9]], + ] + ) + file2_data = np.array( + [ + [[2, 2], [2, 2]], + [[3, 3], [3, 3]], + [[10, 10], [10, 10]], + [[11, 11], [11, 11]], + ] + ) + file3_data = np.array( + [ + [[4, 4], [4, 4]], + [[5, 5], [5, 5]], + [[12, 12], [12, 12]], + ] + ) + file4_data = np.array( + [ + [[6, 6], [6, 6]], + [[7, 7], [7, 7]], + ] + ) + + expected_vds_data = np.array( + [ + [[0, 0], [0, 0]], + [[1, 1], [1, 1]], + [[2, 2], [2, 2]], + [[3, 3], [3, 3]], + [[4, 4], [4, 4]], + [[5, 5], [5, 5]], + [[6, 6], [6, 6]], + [[7, 7], [7, 7]], + [[8, 8], [8, 8]], + [[9, 9], [9, 9]], + [[10, 10], [10, 10]], + [[11, 11], [11, 11]], + [[12, 12], [12, 12]], + ] + ) + return file1_data, file2_data, file3_data, file4_data, expected_vds_data + + +def test_create_interleave_vds_before_files_written( + tmp_path, + mock_round_robin_data: tuple[ + np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray + ], +): + file1_data, file2_data, file3_data, file4_data, expected_vds_data = ( + mock_round_robin_data + ) + + path = tmp_path / "test.h5" + + create_interleave_vds(path, 13, 2, 2, (2, 2)) + + for i, data in enumerate((file1_data, file2_data, file3_data, file4_data)): + with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: + f.create_dataset(name="data", data=data) + + with h5py.File(path, "r") as f: + virtual_dataset = f.get("data") + assert isinstance(virtual_dataset, h5py.Dataset) + result = virtual_dataset[()] + + assert np.array_equal(result, expected_vds_data) + + +def test_create_interleave_vds_after_files_written( + tmp_path, + mock_round_robin_data: tuple[ + np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray + ], +): + file1_data, file2_data, file3_data, file4_data, expected_vds_data = ( + mock_round_robin_data + ) + + path = tmp_path / "test.h5" + + for i, data in enumerate((file1_data, file2_data, file3_data, file4_data)): + with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: + f.create_dataset(name="data", data=data) + + create_interleave_vds(path, 13, 2, 2, (2, 2)) + + with h5py.File(path, "r") as f: + virtual_dataset = f.get("data") + assert isinstance(virtual_dataset, h5py.Dataset) + result = virtual_dataset[()] + + assert np.array_equal(result, expected_vds_data) From 371e9a07465b79e0abea92dce0c92c9d7dab219a Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Mon, 9 Feb 2026 16:28:23 +0000 Subject: [PATCH 06/15] Clearer naming --- src/fastcs_eiger/controllers/odin/generate_vds.py | 8 +++----- tests/test_generate_vds.py | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index 1c53f2e..4b5d989 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -4,7 +4,7 @@ import h5py -def get_split_frame_numbers( +def get_frames_per_file( frame_count: int, frames_per_block: int, n_files: int ) -> list[int]: frame_numbers_per_file = [] @@ -35,9 +35,7 @@ def create_interleave_vds( n_files = math.ceil(frame_count / frames_per_file) file_name_prefix = Path(path).with_suffix("") filepaths = [f"{file_name_prefix}_{str(i + 1).zfill(6)}.h5" for i in range(n_files)] - split_frame_numbers = get_split_frame_numbers( - frame_count, frames_per_block, n_files - ) + frame_count_per_file = get_frames_per_file(frame_count, frames_per_block, n_files) v_layout = h5py.VirtualLayout( shape=(frame_count, frame_shape[0], frame_shape[1]), @@ -45,7 +43,7 @@ def create_interleave_vds( ) for file_idx, (filepath, frames_in_file) in enumerate( - zip(filepaths, split_frame_numbers, strict=True) + zip(filepaths, frame_count_per_file, strict=True) ): # MultiBlockSlice cannot contain partial blocks block_remainder = frames_in_file % frames_per_block diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index 0ee2aa8..711f4aa 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -6,7 +6,7 @@ from fastcs_eiger.controllers.odin.generate_vds import ( create_interleave_vds, - get_split_frame_numbers, + get_frames_per_file, ) @@ -24,15 +24,13 @@ [1000000, 500, 4, [250000, 250000, 250000, 250000]], ], ) -def test_get_split_frame_numbers_splits_frames_correctly( +def test_get_frames_per_file_splits_frames_correctly( frame_count: int, frames_per_block: int, n_files: int, expected_split_frames: list[int], ): - split_frames_numbers = get_split_frame_numbers( - frame_count, frames_per_block, n_files - ) + split_frames_numbers = get_frames_per_file(frame_count, frames_per_block, n_files) assert split_frames_numbers == expected_split_frames From b5f26e89fd376e6356c6cd5524240350155b31f4 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 10 Feb 2026 13:36:42 +0000 Subject: [PATCH 07/15] Take datatype as argument --- src/fastcs_eiger/controllers/odin/generate_vds.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index 4b5d989..bff43cc 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -25,8 +25,8 @@ def create_interleave_vds( frames_per_block: int, blocks_per_file: int, frame_shape: tuple[int, int], + dtype: str = "float", ) -> None: - dtype = "float" dataset_name = "data" frames_per_file = min( (frames_per_block * blocks_per_file if blocks_per_file else frame_count), @@ -45,10 +45,6 @@ def create_interleave_vds( for file_idx, (filepath, frames_in_file) in enumerate( zip(filepaths, frame_count_per_file, strict=True) ): - # MultiBlockSlice cannot contain partial blocks - block_remainder = frames_in_file % frames_per_block - blocked_frames = frames_in_file - block_remainder - v_source = h5py.VirtualSource( filepath, name=dataset_name, @@ -56,6 +52,10 @@ def create_interleave_vds( dtype=dtype, ) + # MultiBlockSlice cannot contain partial blocks + block_remainder = frames_in_file % frames_per_block + blocked_frames = frames_in_file - block_remainder + start = file_idx * frames_per_block stride = n_files * frames_per_block n_blocks = blocked_frames // frames_per_block From b10c76395dd31571abeafa04f4772db0a58b19eb Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 10 Feb 2026 13:37:04 +0000 Subject: [PATCH 08/15] Add attributes --- .../controllers/eiger_detector_controller.py | 3 +++ .../controllers/odin/eiger_odin_controller.py | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/fastcs_eiger/controllers/eiger_detector_controller.py b/src/fastcs_eiger/controllers/eiger_detector_controller.py index cf96cc4..d897d68 100644 --- a/src/fastcs_eiger/controllers/eiger_detector_controller.py +++ b/src/fastcs_eiger/controllers/eiger_detector_controller.py @@ -25,6 +25,9 @@ class EigerDetectorController(EigerSubsystemController): bit_depth_image: AttrR[int] compression: AttrRW[str] trigger_mode: AttrR[str] + nimages: AttrR[int] + x_pixels_in_detector: AttrR[int] + y_pixels_in_detector: AttrR[int] @detector_command async def initialize(self): diff --git a/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py b/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py index 8021363..3b0a315 100644 --- a/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py +++ b/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py @@ -1,15 +1,20 @@ import asyncio +from fastcs.attributes import AttrRW from fastcs.connections import IPConnectionSettings +from fastcs.datatypes import Bool from fastcs.methods import command from fastcs_eiger.controllers.eiger_controller import EigerController +from fastcs_eiger.controllers.odin.generate_vds import create_interleave_vds from fastcs_eiger.controllers.odin.odin_controller import OdinController class EigerOdinController(EigerController): """Eiger controller with Odin sub controller""" + enable_vds_creation = AttrRW(Bool()) + def __init__( self, detector_connection_settings: IPConnectionSettings, @@ -58,3 +63,16 @@ async def start_writing(self): await self.OD.writing.wait_for_value(True, timeout=1) except TimeoutError as e: raise TimeoutError("File writers failed to start") from e + + if self.enable_vds_creation.get(): + create_interleave_vds( + path=self.OD.file_path.get(), + frame_count=self.detector.nimages.get(), + frames_per_block=self.OD.block_size.get(), + blocks_per_file=self.OD.FP.process_blocks_per_file.get(), + frame_shape=( + self.detector.x_pixels_in_detector.get(), + self.detector.y_pixels_in_detector.get(), + ), + dtype=self.OD.FP.data_datatype.get(), + ) From 8557d032dfff34dfa32e4e91c6e7d4099c40877d Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 10 Feb 2026 16:33:09 +0000 Subject: [PATCH 09/15] Account for number of data writers --- .../controllers/odin/eiger_odin_controller.py | 4 +- .../controllers/odin/generate_vds.py | 89 +++++++------ tests/test_generate_vds.py | 117 ++++++++++++------ 3 files changed, 132 insertions(+), 78 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py b/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py index 3b0a315..0de13b2 100644 --- a/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py +++ b/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py @@ -1,4 +1,5 @@ import asyncio +from pathlib import Path from fastcs.attributes import AttrRW from fastcs.connections import IPConnectionSettings @@ -66,7 +67,8 @@ async def start_writing(self): if self.enable_vds_creation.get(): create_interleave_vds( - path=self.OD.file_path.get(), + path=Path(self.OD.file_path.get()), + prefix=self.OD.file_prefix.get(), frame_count=self.detector.nimages.get(), frames_per_block=self.OD.block_size.get(), blocks_per_file=self.OD.FP.process_blocks_per_file.get(), diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index bff43cc..6c53235 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -4,7 +4,7 @@ import h5py -def get_frames_per_file( +def get_frames_per_file_writer( frame_count: int, frames_per_block: int, n_files: int ) -> list[int]: frame_numbers_per_file = [] @@ -20,60 +20,73 @@ def get_frames_per_file( def create_interleave_vds( - path: str, + path: Path, + prefix: str, frame_count: int, frames_per_block: int, blocks_per_file: int, frame_shape: tuple[int, int], dtype: str = "float", + n_file_writers: int = 4, ) -> None: dataset_name = "data" - frames_per_file = min( - (frames_per_block * blocks_per_file if blocks_per_file else frame_count), - frame_count, + max_frames_per_file = ( + frames_per_block * blocks_per_file if blocks_per_file else frame_count + ) + + frame_count_per_file_writer = get_frames_per_file_writer( + frame_count, frames_per_block, n_file_writers ) - n_files = math.ceil(frame_count / frames_per_file) - file_name_prefix = Path(path).with_suffix("") - filepaths = [f"{file_name_prefix}_{str(i + 1).zfill(6)}.h5" for i in range(n_files)] - frame_count_per_file = get_frames_per_file(frame_count, frames_per_block, n_files) v_layout = h5py.VirtualLayout( shape=(frame_count, frame_shape[0], frame_shape[1]), dtype=dtype, ) - for file_idx, (filepath, frames_in_file) in enumerate( - zip(filepaths, frame_count_per_file, strict=True) - ): - v_source = h5py.VirtualSource( - filepath, - name=dataset_name, - shape=(frames_in_file, frame_shape[0], frame_shape[1]), - dtype=dtype, - ) + for file_writer_idx, n_frames in enumerate(frame_count_per_file_writer): + n_files = math.ceil(n_frames / max_frames_per_file) + + for file_idx in range(n_files): + frames_in_file = min( + max_frames_per_file, n_frames - (max_frames_per_file * file_idx) + ) + file_number = 1 + file_writer_idx + file_idx * n_file_writers + + v_source = h5py.VirtualSource( + f"{path / prefix}_{str(file_number).zfill(6)}.h5", + name=dataset_name, + shape=(frames_in_file, frame_shape[0], frame_shape[1]), + dtype=dtype, + ) - # MultiBlockSlice cannot contain partial blocks - block_remainder = frames_in_file % frames_per_block - blocked_frames = frames_in_file - block_remainder + # MultiBlockSlice cannot contain partial blocks + remainder_frames = frames_in_file % frames_per_block + full_block_frames = frames_in_file - remainder_frames - start = file_idx * frames_per_block - stride = n_files * frames_per_block - n_blocks = blocked_frames // frames_per_block + start = ( + file_writer_idx * frames_per_block + + max_frames_per_file * n_file_writers * file_idx + ) + n_blocks = full_block_frames // frames_per_block - if n_blocks: - source = v_source[:blocked_frames, :, :] - v_layout[ - h5py.MultiBlockSlice( - start=start, stride=stride, count=n_blocks, block=frames_per_block - ), - :, - :, - ] = source + if n_blocks: + stride = n_file_writers * frames_per_block + source = v_source[:full_block_frames, :, :] + v_layout[ + h5py.MultiBlockSlice( + start=start, + stride=stride, + count=n_blocks, + block=frames_per_block, + ), + :, + :, + ] = source - if block_remainder: - # Last few frames that don't fit into a block - source = v_source[blocked_frames:frames_in_file, :, :] - v_layout[frame_count - block_remainder : frame_count, :, :] = source + if remainder_frames: + # Last few frames that don't fit into a block + source = v_source[full_block_frames:frames_in_file, :, :] + v_layout[frame_count - remainder_frames : frame_count, :, :] = source - with h5py.File(path, "w", libver="latest") as f: + with h5py.File(f"{path / prefix}_vds.h5", "w", libver="latest") as f: f.create_virtual_dataset(dataset_name, v_layout) diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index 711f4aa..e196591 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -1,3 +1,4 @@ +from pathlib import Path from unittest.mock import MagicMock, call, patch import h5py @@ -6,12 +7,12 @@ from fastcs_eiger.controllers.odin.generate_vds import ( create_interleave_vds, - get_frames_per_file, + get_frames_per_file_writer, ) @pytest.mark.parametrize( - "frame_count, frames_per_block, n_files, expected_split_frames", + "frame_count, frames_per_block, n_file_writers, expected_split_frames", [ [10, 1, 3, [4, 3, 3]], [10, 1, 10, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], @@ -20,32 +21,47 @@ [10, 4, 2, [6, 4]], [10, 4, 3, [4, 4, 2]], [10, 3, 3, [4, 3, 3]], - [105, 10, 3, [40, 35, 30]], + [105, 10, 4, [30, 30, 25, 20]], [1000000, 500, 4, [250000, 250000, 250000, 250000]], ], ) def test_get_frames_per_file_splits_frames_correctly( frame_count: int, frames_per_block: int, - n_files: int, + n_file_writers: int, expected_split_frames: list[int], ): - split_frames_numbers = get_frames_per_file(frame_count, frames_per_block, n_files) + split_frames_numbers = get_frames_per_file_writer( + frame_count, frames_per_block, n_file_writers + ) assert split_frames_numbers == expected_split_frames @pytest.mark.parametrize( - "frame_count, frames_per_block, blocks_per_file, expected_files", + "frame_count, frames_per_block, blocks_per_file, n_file_writers, expected_files", [ - [100, 10, 5, {b"test_000001.h5", b"test_000002.h5"}], - [105, 10, 5, {b"test_000001.h5", b"test_000002.h5", b"test_000003.h5"}], - [25, 10, 1, {b"test_000001.h5", b"test_000002.h5", b"test_000003.h5"}], - [105, 10, 0, {b"test_000001.h5"}], - [1000, 2, 0, {b"test_000001.h5"}], + [100, 10, 5, 1, {b"test_000001.h5", b"test_000002.h5"}], + [105, 10, 5, 1, {b"test_000001.h5", b"test_000002.h5", b"test_000003.h5"}], + [ + 25, + 5, + 1, + 4, + { + b"test_000001.h5", + b"test_000002.h5", + b"test_000003.h5", + b"test_000004.h5", + b"test_000005.h5", + }, + ], + [105, 10, 0, 1, {b"test_000001.h5"}], + [1000, 2, 0, 1, {b"test_000001.h5"}], [ 100, 10, 3, + 2, { b"test_000001.h5", b"test_000002.h5", @@ -59,6 +75,7 @@ def test_create_interleave_vds_layout_contains_expected_files_and_has_expected_s frame_count: int, frames_per_block: int, blocks_per_file: int, + n_file_writers: int, expected_files: set[str], ): mock_file = MagicMock() @@ -68,7 +85,13 @@ def test_create_interleave_vds_layout_contains_expected_files_and_has_expected_s "fastcs_eiger.controllers.odin.generate_vds.h5py.File", return_value=mock_file ): create_interleave_vds( - "test", frame_count, frames_per_block, blocks_per_file, (1, 1) + Path(), + "test", + frame_count, + frames_per_block, + blocks_per_file, + (1, 1), + n_file_writers=n_file_writers, ) layout: h5py.VirtualLayout = mock_f.create_virtual_dataset.call_args[0][1] assert layout._src_filenames == expected_files @@ -81,7 +104,7 @@ def test_create_interleave_vds_layout_contains_expected_files_and_has_expected_s def test_create_interleave_cds_makes_expected_source_layout_calls( mock_file: MagicMock, mock_virtual_layoud: MagicMock, mock_virtual_source: MagicMock ): - create_interleave_vds("test.h5", 105, 10, 3, (10, 10)) + create_interleave_vds(Path(), "test", 105, 10, 3, (10, 10)) expected_split_frames = [30, 30, 25, 20] assert len(mock_virtual_source.call_args_list) == 4 for i, expected_frames in enumerate(expected_split_frames): @@ -94,9 +117,8 @@ def test_create_interleave_cds_makes_expected_source_layout_calls( @pytest.fixture -def mock_round_robin_data() -> tuple[ - np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray -]: +def mock_round_robin_data() -> tuple[list[np.ndarray], np.ndarray]: + """Assuming 4 file writers, blocks of 2 frames, and 2 blocks per file.""" file1_data = np.array( [ [[0, 0], [0, 0]], @@ -118,12 +140,26 @@ def mock_round_robin_data() -> tuple[ [[4, 4], [4, 4]], [[5, 5], [5, 5]], [[12, 12], [12, 12]], + [[13, 13], [13, 13]], ] ) file4_data = np.array( [ [[6, 6], [6, 6]], [[7, 7], [7, 7]], + [[14, 14], [14, 14]], + [[15, 15], [15, 15]], + ] + ) + file5_data = np.array( + [ + [[16, 16], [16, 16]], + [[17, 17], [17, 17]], + ] + ) + file6_data = np.array( + [ + [[18, 18], [18, 18]], ] ) @@ -142,30 +178,38 @@ def mock_round_robin_data() -> tuple[ [[10, 10], [10, 10]], [[11, 11], [11, 11]], [[12, 12], [12, 12]], + [[13, 13], [13, 13]], + [[14, 14], [14, 14]], + [[15, 15], [15, 15]], + [[16, 16], [16, 16]], + [[17, 17], [17, 17]], + [[18, 18], [18, 18]], ] ) - return file1_data, file2_data, file3_data, file4_data, expected_vds_data + return [ + file1_data, + file2_data, + file3_data, + file4_data, + file5_data, + file6_data, + ], expected_vds_data def test_create_interleave_vds_before_files_written( tmp_path, - mock_round_robin_data: tuple[ - np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray - ], + mock_round_robin_data: tuple[list[np.ndarray], np.ndarray], ): - file1_data, file2_data, file3_data, file4_data, expected_vds_data = ( - mock_round_robin_data - ) - - path = tmp_path / "test.h5" + acquired_data, expected_vds_data = mock_round_robin_data + prefix = "test" - create_interleave_vds(path, 13, 2, 2, (2, 2)) + create_interleave_vds(tmp_path, prefix, 19, 2, 2, (2, 2)) - for i, data in enumerate((file1_data, file2_data, file3_data, file4_data)): + for i, data in enumerate(acquired_data): with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: f.create_dataset(name="data", data=data) - with h5py.File(path, "r") as f: + with h5py.File(f"{tmp_path / prefix}_vds.h5", "r") as f: virtual_dataset = f.get("data") assert isinstance(virtual_dataset, h5py.Dataset) result = virtual_dataset[()] @@ -175,23 +219,18 @@ def test_create_interleave_vds_before_files_written( def test_create_interleave_vds_after_files_written( tmp_path, - mock_round_robin_data: tuple[ - np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray - ], + mock_round_robin_data: tuple[list[np.ndarray], np.ndarray], ): - file1_data, file2_data, file3_data, file4_data, expected_vds_data = ( - mock_round_robin_data - ) - - path = tmp_path / "test.h5" + acquired_data, expected_vds_data = mock_round_robin_data + prefix = "test" - for i, data in enumerate((file1_data, file2_data, file3_data, file4_data)): + for i, data in enumerate(acquired_data): with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: f.create_dataset(name="data", data=data) - create_interleave_vds(path, 13, 2, 2, (2, 2)) + create_interleave_vds(tmp_path, prefix, 19, 2, 2, (2, 2)) - with h5py.File(path, "r") as f: + with h5py.File(f"{tmp_path / prefix}_vds.h5", "r") as f: virtual_dataset = f.get("data") assert isinstance(virtual_dataset, h5py.Dataset) result = virtual_dataset[()] From 96f45116220d83547f4382b30b929da2ab83ceb4 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 10 Feb 2026 17:17:26 +0000 Subject: [PATCH 10/15] Improve tests --- tests/test_generate_vds.py | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index e196591..0ba3bf7 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -1,5 +1,5 @@ from pathlib import Path -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, patch import h5py import numpy as np @@ -56,7 +56,7 @@ def test_get_frames_per_file_splits_frames_correctly( }, ], [105, 10, 0, 1, {b"test_000001.h5"}], - [1000, 2, 0, 1, {b"test_000001.h5"}], + [1000, 2, 0, 2, {b"test_000001.h5", b"test_000002.h5"}], [ 100, 10, @@ -101,14 +101,31 @@ def test_create_interleave_vds_layout_contains_expected_files_and_has_expected_s @patch("fastcs_eiger.controllers.odin.generate_vds.h5py.VirtualSource") @patch("fastcs_eiger.controllers.odin.generate_vds.h5py.VirtualLayout") @patch("fastcs_eiger.controllers.odin.generate_vds.h5py.File") +@pytest.mark.parametrize( + "frame_count, frames_per_block, blocks_per_file, expected_frames_per_file", + [ + [155, 10, 3, [30, 30, 30, 30, 10, 10, 10, 5]], + [145, 10, 3, [30, 30, 30, 30, 10, 10, 5]], + [145, 10, 0, [40, 40, 35, 30]], + [145, 1, 0, [37, 36, 36, 36]], + [20, 30, 0, [20]], + ], +) def test_create_interleave_cds_makes_expected_source_layout_calls( - mock_file: MagicMock, mock_virtual_layoud: MagicMock, mock_virtual_source: MagicMock + mock_file: MagicMock, + mock_virtual_layoud: MagicMock, + mock_virtual_source: MagicMock, + frame_count: int, + frames_per_block: int, + blocks_per_file: int, + expected_frames_per_file: list[int], ): - create_interleave_vds(Path(), "test", 105, 10, 3, (10, 10)) - expected_split_frames = [30, 30, 25, 20] - assert len(mock_virtual_source.call_args_list) == 4 - for i, expected_frames in enumerate(expected_split_frames): - assert mock_virtual_source.call_args_list[i] == call( + create_interleave_vds( + Path(), "test", frame_count, frames_per_block, blocks_per_file, (10, 10) + ) + assert len(mock_virtual_source.call_args_list) == len(expected_frames_per_file) + for i, expected_frames in enumerate(expected_frames_per_file): + mock_virtual_source.assert_any_call( f"test_00000{i + 1}.h5", name="data", shape=(expected_frames, 10, 10), @@ -118,7 +135,9 @@ def test_create_interleave_cds_makes_expected_source_layout_calls( @pytest.fixture def mock_round_robin_data() -> tuple[list[np.ndarray], np.ndarray]: - """Assuming 4 file writers, blocks of 2 frames, and 2 blocks per file.""" + """Assuming 4 file writers, 19 frames in blocks of 2 frames, and 2 blocks per file. + Frames are 2 x 2. The values in each frame represent the order they would have been + written in, and therefore the order the VDS should splice them together in.""" file1_data = np.array( [ [[0, 0], [0, 0]], From 779720047a4a3743ccbd50b5ec07ed5eafa20524 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 10 Feb 2026 17:42:35 +0000 Subject: [PATCH 11/15] Typo --- tests/test_generate_vds.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index 0ba3bf7..6b623a5 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -25,7 +25,7 @@ [1000000, 500, 4, [250000, 250000, 250000, 250000]], ], ) -def test_get_frames_per_file_splits_frames_correctly( +def test_get_frames_per_file_writer_splits_frames_correctly( frame_count: int, frames_per_block: int, n_file_writers: int, From 2cc760a9b0855a09f66bd8bbdaac170a52b5b2cb Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 10 Feb 2026 18:01:43 +0000 Subject: [PATCH 12/15] Fix naming --- src/fastcs_eiger/controllers/odin/generate_vds.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index 6c53235..d48dd91 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -5,13 +5,13 @@ def get_frames_per_file_writer( - frame_count: int, frames_per_block: int, n_files: int + frame_count: int, frames_per_block: int, n_file_writers: int ) -> list[int]: frame_numbers_per_file = [] n_blocks = math.ceil(frame_count / frames_per_block) - min_blocks_per_file = n_blocks // n_files - remainder = n_blocks - min_blocks_per_file * n_files - for i in range(n_files): + min_blocks_per_file = n_blocks // n_file_writers + remainder = n_blocks - min_blocks_per_file * n_file_writers + for i in range(n_file_writers): blocks = min_blocks_per_file + (i < remainder) frame_numbers_per_file.append(blocks * frames_per_block) overflow = sum(frame_numbers_per_file) - frame_count From 15bba9da12310719b3fb4311af0bfec7bf7ffce9 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Thu, 12 Feb 2026 14:34:27 +0000 Subject: [PATCH 13/15] Add datasets as a parameter --- .../controllers/odin/generate_vds.py | 132 ++++++++++-------- tests/test_generate_vds.py | 31 +++- 2 files changed, 96 insertions(+), 67 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index d48dd91..bcad4f7 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -4,24 +4,54 @@ import h5py -def get_frames_per_file_writer( +def _get_frames_per_file_writer( frame_count: int, frames_per_block: int, n_file_writers: int ) -> list[int]: - frame_numbers_per_file = [] + frames_per_file_writer = [] n_blocks = math.ceil(frame_count / frames_per_block) min_blocks_per_file = n_blocks // n_file_writers remainder = n_blocks - min_blocks_per_file * n_file_writers for i in range(n_file_writers): blocks = min_blocks_per_file + (i < remainder) - frame_numbers_per_file.append(blocks * frames_per_block) - overflow = sum(frame_numbers_per_file) - frame_count - frame_numbers_per_file[remainder - 1] -= overflow - return frame_numbers_per_file + frames_per_file_writer.append(blocks * frames_per_block) + overflow = sum(frames_per_file_writer) - frame_count + frames_per_file_writer[remainder - 1] -= overflow + return frames_per_file_writer + + +def _calculate_frame_distribution( + frame_count, frames_per_block, blocks_per_file, n_file_writers +) -> dict[int, dict]: + + frame_distribution: dict[int, dict] = {} + + max_frames_per_file = ( + frames_per_block * blocks_per_file if blocks_per_file else frame_count + ) + # total frames written before one of the file writers has to start a new file + frames_before_new_file = n_file_writers * max_frames_per_file + frames_per_file_writer = _get_frames_per_file_writer( + frame_count, frames_per_block, n_file_writers + ) + for file_writer_idx, n_frames in enumerate(frames_per_file_writer): + n_files = math.ceil(n_frames / max_frames_per_file) + for i in range(n_files): + file_idx = file_writer_idx + i * n_file_writers + frames = min(n_frames - i * max_frames_per_file, max_frames_per_file) + frame_distribution[file_idx + 1] = { + "frames": frames, + "blocks": frames // frames_per_block, + "remainder_frames": frames % frames_per_block, + "start": frames_per_block * file_writer_idx + + file_idx // n_file_writers * frames_before_new_file, + } + return frame_distribution def create_interleave_vds( path: Path, prefix: str, + datasets: list[str], frame_count: int, frames_per_block: int, blocks_per_file: int, @@ -29,64 +59,42 @@ def create_interleave_vds( dtype: str = "float", n_file_writers: int = 4, ) -> None: - dataset_name = "data" - max_frames_per_file = ( - frames_per_block * blocks_per_file if blocks_per_file else frame_count - ) - - frame_count_per_file_writer = get_frames_per_file_writer( - frame_count, frames_per_block, n_file_writers - ) - - v_layout = h5py.VirtualLayout( - shape=(frame_count, frame_shape[0], frame_shape[1]), - dtype=dtype, + frame_distribution = _calculate_frame_distribution( + frame_count, frames_per_block, blocks_per_file, n_file_writers ) + stride = n_file_writers * frames_per_block - for file_writer_idx, n_frames in enumerate(frame_count_per_file_writer): - n_files = math.ceil(n_frames / max_frames_per_file) - - for file_idx in range(n_files): - frames_in_file = min( - max_frames_per_file, n_frames - (max_frames_per_file * file_idx) - ) - file_number = 1 + file_writer_idx + file_idx * n_file_writers - - v_source = h5py.VirtualSource( - f"{path / prefix}_{str(file_number).zfill(6)}.h5", - name=dataset_name, - shape=(frames_in_file, frame_shape[0], frame_shape[1]), + with h5py.File(f"{path / prefix}_vds.h5", "w", libver="latest") as f: + for dataset_name in datasets: + v_layout = h5py.VirtualLayout( + shape=(frame_count, frame_shape[0], frame_shape[1]), dtype=dtype, ) + for file_number, frame_info in frame_distribution.items(): + full_block_frames = frame_info["blocks"] * frames_per_block + v_source = h5py.VirtualSource( + f"{path / prefix}_{str(file_number).zfill(6)}.h5", + name=dataset_name, + shape=(frame_info["frames"], frame_shape[0], frame_shape[1]), + dtype=dtype, + ) + if frame_info["blocks"]: + source = v_source[:full_block_frames, :, :] + v_layout[ + h5py.MultiBlockSlice( + start=frame_info["start"], + stride=stride, + count=frame_info["blocks"], + block=frames_per_block, + ), + :, + :, + ] = source + if frame_info["remainder_frames"]: + # Last few frames that don't fit into a block + source = v_source[full_block_frames : frame_info["frames"], :, :] + v_layout[ + frame_count - frame_info["remainder_frames"] : frame_count, :, : + ] = source - # MultiBlockSlice cannot contain partial blocks - remainder_frames = frames_in_file % frames_per_block - full_block_frames = frames_in_file - remainder_frames - - start = ( - file_writer_idx * frames_per_block - + max_frames_per_file * n_file_writers * file_idx - ) - n_blocks = full_block_frames // frames_per_block - - if n_blocks: - stride = n_file_writers * frames_per_block - source = v_source[:full_block_frames, :, :] - v_layout[ - h5py.MultiBlockSlice( - start=start, - stride=stride, - count=n_blocks, - block=frames_per_block, - ), - :, - :, - ] = source - - if remainder_frames: - # Last few frames that don't fit into a block - source = v_source[full_block_frames:frames_in_file, :, :] - v_layout[frame_count - remainder_frames : frame_count, :, :] = source - - with h5py.File(f"{path / prefix}_vds.h5", "w", libver="latest") as f: - f.create_virtual_dataset(dataset_name, v_layout) + f.create_virtual_dataset(dataset_name, v_layout) diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index 6b623a5..d41ed49 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -6,8 +6,9 @@ import pytest from fastcs_eiger.controllers.odin.generate_vds import ( + _calculate_frame_distribution, + _get_frames_per_file_writer, create_interleave_vds, - get_frames_per_file_writer, ) @@ -31,7 +32,7 @@ def test_get_frames_per_file_writer_splits_frames_correctly( n_file_writers: int, expected_split_frames: list[int], ): - split_frames_numbers = get_frames_per_file_writer( + split_frames_numbers = _get_frames_per_file_writer( frame_count, frames_per_block, n_file_writers ) assert split_frames_numbers == expected_split_frames @@ -87,6 +88,7 @@ def test_create_interleave_vds_layout_contains_expected_files_and_has_expected_s create_interleave_vds( Path(), "test", + ["data"], frame_count, frames_per_block, blocks_per_file, @@ -121,7 +123,13 @@ def test_create_interleave_cds_makes_expected_source_layout_calls( expected_frames_per_file: list[int], ): create_interleave_vds( - Path(), "test", frame_count, frames_per_block, blocks_per_file, (10, 10) + Path(), + "test", + ["data"], + frame_count, + frames_per_block, + blocks_per_file, + (10, 10), ) assert len(mock_virtual_source.call_args_list) == len(expected_frames_per_file) for i, expected_frames in enumerate(expected_frames_per_file): @@ -133,6 +141,19 @@ def test_create_interleave_cds_makes_expected_source_layout_calls( ) +def test_calculate_frame_distribution(): + expected = { + 1: {"start": 0, "frames": 4, "blocks": 2, "remainder_frames": 0}, + 2: {"start": 2, "frames": 4, "blocks": 2, "remainder_frames": 0}, + 3: {"start": 4, "frames": 4, "blocks": 2, "remainder_frames": 0}, + 4: {"start": 6, "frames": 4, "blocks": 2, "remainder_frames": 0}, + 5: {"start": 16, "frames": 2, "blocks": 1, "remainder_frames": 0}, + 6: {"start": 18, "frames": 1, "blocks": 0, "remainder_frames": 1}, + } + result = _calculate_frame_distribution(19, 2, 2, 4) + assert result == expected + + @pytest.fixture def mock_round_robin_data() -> tuple[list[np.ndarray], np.ndarray]: """Assuming 4 file writers, 19 frames in blocks of 2 frames, and 2 blocks per file. @@ -222,7 +243,7 @@ def test_create_interleave_vds_before_files_written( acquired_data, expected_vds_data = mock_round_robin_data prefix = "test" - create_interleave_vds(tmp_path, prefix, 19, 2, 2, (2, 2)) + create_interleave_vds(tmp_path, prefix, ["data"], 19, 2, 2, (2, 2)) for i, data in enumerate(acquired_data): with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: @@ -247,7 +268,7 @@ def test_create_interleave_vds_after_files_written( with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: f.create_dataset(name="data", data=data) - create_interleave_vds(tmp_path, prefix, 19, 2, 2, (2, 2)) + create_interleave_vds(tmp_path, prefix, ["data"], 19, 2, 2, (2, 2)) with h5py.File(f"{tmp_path / prefix}_vds.h5", "r") as f: virtual_dataset = f.get("data") From 3a1ec0f1c8cdea2ce469e53f3f50da1db0995a87 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Thu, 12 Feb 2026 16:36:50 +0000 Subject: [PATCH 14/15] Clean up and add more tests --- .../controllers/odin/generate_vds.py | 56 +++++--- tests/test_generate_vds.py | 120 +++++++++++++++--- 2 files changed, 137 insertions(+), 39 deletions(-) diff --git a/src/fastcs_eiger/controllers/odin/generate_vds.py b/src/fastcs_eiger/controllers/odin/generate_vds.py index bcad4f7..6bd0677 100644 --- a/src/fastcs_eiger/controllers/odin/generate_vds.py +++ b/src/fastcs_eiger/controllers/odin/generate_vds.py @@ -1,19 +1,37 @@ import math +from dataclasses import dataclass from pathlib import Path import h5py +@dataclass +class FileFrames: + frames: int + start: int + frames_per_block: int + + @property + def blocks(self): + return self.frames // self.frames_per_block + + @property + def remainder_frames(self): + return self.frames % self.frames_per_block + + def _get_frames_per_file_writer( frame_count: int, frames_per_block: int, n_file_writers: int ) -> list[int]: - frames_per_file_writer = [] n_blocks = math.ceil(frame_count / frames_per_block) min_blocks_per_file = n_blocks // n_file_writers remainder = n_blocks - min_blocks_per_file * n_file_writers + + frames_per_file_writer = [] for i in range(n_file_writers): blocks = min_blocks_per_file + (i < remainder) frames_per_file_writer.append(blocks * frames_per_block) + overflow = sum(frames_per_file_writer) - frame_count frames_per_file_writer[remainder - 1] -= overflow return frames_per_file_writer @@ -21,9 +39,9 @@ def _get_frames_per_file_writer( def _calculate_frame_distribution( frame_count, frames_per_block, blocks_per_file, n_file_writers -) -> dict[int, dict]: +) -> dict[int, FileFrames]: - frame_distribution: dict[int, dict] = {} + frame_distribution: dict[int, FileFrames] = {} max_frames_per_file = ( frames_per_block * blocks_per_file if blocks_per_file else frame_count @@ -37,14 +55,14 @@ def _calculate_frame_distribution( n_files = math.ceil(n_frames / max_frames_per_file) for i in range(n_files): file_idx = file_writer_idx + i * n_file_writers - frames = min(n_frames - i * max_frames_per_file, max_frames_per_file) - frame_distribution[file_idx + 1] = { - "frames": frames, - "blocks": frames // frames_per_block, - "remainder_frames": frames % frames_per_block, - "start": frames_per_block * file_writer_idx + + frame_distribution[file_idx + 1] = FileFrames( + frames=min(n_frames - i * max_frames_per_file, max_frames_per_file), + frames_per_block=frames_per_block, + start=frames_per_block * file_writer_idx + file_idx // n_file_writers * frames_before_new_file, - } + ) + return frame_distribution @@ -70,31 +88,31 @@ def create_interleave_vds( shape=(frame_count, frame_shape[0], frame_shape[1]), dtype=dtype, ) - for file_number, frame_info in frame_distribution.items(): - full_block_frames = frame_info["blocks"] * frames_per_block + for file_number, file_frames in frame_distribution.items(): + full_block_frames = file_frames.blocks * frames_per_block v_source = h5py.VirtualSource( f"{path / prefix}_{str(file_number).zfill(6)}.h5", name=dataset_name, - shape=(frame_info["frames"], frame_shape[0], frame_shape[1]), + shape=(file_frames.frames, frame_shape[0], frame_shape[1]), dtype=dtype, ) - if frame_info["blocks"]: + if file_frames.blocks: source = v_source[:full_block_frames, :, :] v_layout[ h5py.MultiBlockSlice( - start=frame_info["start"], + start=file_frames.start, stride=stride, - count=frame_info["blocks"], + count=file_frames.blocks, block=frames_per_block, ), :, :, ] = source - if frame_info["remainder_frames"]: + if file_frames.remainder_frames: # Last few frames that don't fit into a block - source = v_source[full_block_frames : frame_info["frames"], :, :] + source = v_source[full_block_frames : file_frames.frames, :, :] v_layout[ - frame_count - frame_info["remainder_frames"] : frame_count, :, : + frame_count - file_frames.remainder_frames : frame_count, :, : ] = source f.create_virtual_dataset(dataset_name, v_layout) diff --git a/tests/test_generate_vds.py b/tests/test_generate_vds.py index d41ed49..afb7b6f 100644 --- a/tests/test_generate_vds.py +++ b/tests/test_generate_vds.py @@ -6,6 +6,7 @@ import pytest from fastcs_eiger.controllers.odin.generate_vds import ( + FileFrames, _calculate_frame_distribution, _get_frames_per_file_writer, create_interleave_vds, @@ -122,36 +123,94 @@ def test_create_interleave_cds_makes_expected_source_layout_calls( blocks_per_file: int, expected_frames_per_file: list[int], ): + datasets = ["data", "sets"] create_interleave_vds( Path(), "test", - ["data"], + datasets, frame_count, frames_per_block, blocks_per_file, (10, 10), ) - assert len(mock_virtual_source.call_args_list) == len(expected_frames_per_file) - for i, expected_frames in enumerate(expected_frames_per_file): - mock_virtual_source.assert_any_call( - f"test_00000{i + 1}.h5", - name="data", - shape=(expected_frames, 10, 10), - dtype="float", - ) + assert len(mock_virtual_source.call_args_list) == len( + expected_frames_per_file + ) * len(datasets) + for dataset_name in datasets: + for i, expected_frames in enumerate(expected_frames_per_file): + mock_virtual_source.assert_any_call( + f"test_00000{i + 1}.h5", + name=dataset_name, + shape=(expected_frames, 10, 10), + dtype="float", + ) -def test_calculate_frame_distribution(): - expected = { - 1: {"start": 0, "frames": 4, "blocks": 2, "remainder_frames": 0}, - 2: {"start": 2, "frames": 4, "blocks": 2, "remainder_frames": 0}, - 3: {"start": 4, "frames": 4, "blocks": 2, "remainder_frames": 0}, - 4: {"start": 6, "frames": 4, "blocks": 2, "remainder_frames": 0}, - 5: {"start": 16, "frames": 2, "blocks": 1, "remainder_frames": 0}, - 6: {"start": 18, "frames": 1, "blocks": 0, "remainder_frames": 1}, - } - result = _calculate_frame_distribution(19, 2, 2, 4) - assert result == expected +@pytest.mark.parametrize( + "frames, frames_per_block, expected_blocks, expected_remainder", + [[6, 3, 2, 0], [8, 3, 2, 2], [6, 7, 0, 6], [6, 6, 1, 0]], +) +def test_file_frames_dataclass_calculates_blocks_and_remainder_correctly( + frames: int, frames_per_block: int, expected_blocks, expected_remainder +): + file_frames = FileFrames(frames=frames, frames_per_block=frames_per_block, start=0) + assert file_frames.blocks == expected_blocks + assert file_frames.remainder_frames == expected_remainder + + +@pytest.mark.parametrize( + "frame_count, frames_per_block, blocks_per_file, n_writers, expected_distribution", + [ + [ + 10, + 3, + 2, + 1, + { + 1: FileFrames(frames=6, frames_per_block=3, start=0), + 2: FileFrames(frames=4, frames_per_block=3, start=6), + }, + ], + [10, 10, 0, 4, {1: FileFrames(frames=10, frames_per_block=10, start=0)}], + [ + 985, + 10, + 0, + 4, + { + 1: FileFrames(frames=250, frames_per_block=10, start=0), + 2: FileFrames(frames=250, frames_per_block=10, start=10), + 3: FileFrames(frames=245, frames_per_block=10, start=20), + 4: FileFrames(frames=240, frames_per_block=10, start=30), + }, + ], + [ + 19, + 2, + 2, + 4, + { + 1: FileFrames(frames=4, frames_per_block=2, start=0), + 2: FileFrames(frames=4, frames_per_block=2, start=2), + 3: FileFrames(frames=4, frames_per_block=2, start=4), + 4: FileFrames(frames=4, frames_per_block=2, start=6), + 5: FileFrames(frames=2, frames_per_block=2, start=16), + 6: FileFrames(frames=1, frames_per_block=2, start=18), + }, + ], + ], +) +def test_calculate_frame_distribution( + frame_count: int, + frames_per_block: int, + blocks_per_file: int, + n_writers: int, + expected_distribution: dict[int, FileFrames], +): + result = _calculate_frame_distribution( + frame_count, frames_per_block, blocks_per_file, n_writers + ) + assert result == expected_distribution @pytest.fixture @@ -276,3 +335,24 @@ def test_create_interleave_vds_after_files_written( result = virtual_dataset[()] assert np.array_equal(result, expected_vds_data) + + +def test_create_interleave_vds_creates_virtual_dataset_for_all_datasets( + tmp_path, + mock_round_robin_data: tuple[list[np.ndarray], np.ndarray], +): + acquired_data, expected_vds_data = mock_round_robin_data + prefix = "test" + + for i, data in enumerate(acquired_data): + with h5py.File(tmp_path / f"test_00000{i + 1}.h5", "w") as f: + f.create_dataset(name="one", data=np.zeros(data.shape)) + f.create_dataset(name="two", data=data * 10) + f.create_dataset(name="three", data=data * 100) + + create_interleave_vds(tmp_path, prefix, ["one", "two", "three"], 19, 2, 2, (2, 2)) + + with h5py.File(f"{tmp_path / prefix}_vds.h5", "r") as f: + assert np.array_equal(f.get("one")[()], np.zeros(expected_vds_data.shape)) # type: ignore + assert np.array_equal(f.get("two")[()], expected_vds_data * 10) # type: ignore + assert np.array_equal(f.get("three")[()], expected_vds_data * 100) # type: ignore From e41097c50d90d8d18a9c21896530c6840dc38842 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Thu, 12 Feb 2026 17:10:11 +0000 Subject: [PATCH 15/15] Use correct attributes --- src/fastcs_eiger/controllers/eiger_detector_controller.py | 3 --- src/fastcs_eiger/controllers/odin/eiger_odin_controller.py | 7 ++++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/fastcs_eiger/controllers/eiger_detector_controller.py b/src/fastcs_eiger/controllers/eiger_detector_controller.py index d897d68..cf96cc4 100644 --- a/src/fastcs_eiger/controllers/eiger_detector_controller.py +++ b/src/fastcs_eiger/controllers/eiger_detector_controller.py @@ -25,9 +25,6 @@ class EigerDetectorController(EigerSubsystemController): bit_depth_image: AttrR[int] compression: AttrRW[str] trigger_mode: AttrR[str] - nimages: AttrR[int] - x_pixels_in_detector: AttrR[int] - y_pixels_in_detector: AttrR[int] @detector_command async def initialize(self): diff --git a/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py b/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py index 0de13b2..9670fc0 100644 --- a/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py +++ b/src/fastcs_eiger/controllers/odin/eiger_odin_controller.py @@ -69,12 +69,13 @@ async def start_writing(self): create_interleave_vds( path=Path(self.OD.file_path.get()), prefix=self.OD.file_prefix.get(), - frame_count=self.detector.nimages.get(), + datasets=["data"], # NEED TO GET THIS FROM SOMEWHERE + frame_count=self.OD.FP.frames.get(), frames_per_block=self.OD.block_size.get(), blocks_per_file=self.OD.FP.process_blocks_per_file.get(), frame_shape=( - self.detector.x_pixels_in_detector.get(), - self.detector.y_pixels_in_detector.get(), + self.OD.FP.data_dims_1.get(), + self.OD.FP.data_dims_0.get(), ), dtype=self.OD.FP.data_datatype.get(), )