diff --git a/.github/conda/bld.bat b/.github/conda/bld.bat deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/build.sh b/.github/conda/build.sh deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/conda_build_config.yaml b/.github/conda/conda_build_config.yaml deleted file mode 100644 index 49777cb0..00000000 --- a/.github/conda/conda_build_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -python: - - 3.13 - - 3.12 - - 3.11 - - 3.10 - - diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml deleted file mode 100644 index b78f32b9..00000000 --- a/.github/conda/meta.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{% set data = load_setup_py_data() %} - -package: - name: ctlearn - version: {{ data.get('version') }} -source: - path: ../.. - -build: - #noarch: generic - number: 0 - -requirements: - build: - - python #==3.12 - - numpy >=1.20 - - setuptools - - astropy - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - host: - - python #==3.12 - - numpy >=1.20 - - astropy - - setuptools - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - run: - - python #==3.12 - - numpy >=1.20 - - jupyter - - setuptools - - astropy - - scipy - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - - test: - imports: - - ctlearn -about: - home: https://github.com/ctlearn-project/ctlearn/ - license: BSD3-Clause - license_file: LICENSE - summary: Deep Learning for IACT Event Reconstruction. -extra: - recipe-maintainers: - - TjarkMiener - - nietootein diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index c2c8c558..da00acbf 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -1,4 +1,3 @@ - name: CI on: @@ -15,39 +14,61 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: [ '3.10','3.11', '3.12', '3.13'] - max-parallel: 5 + python-version: ['3.12', '3.13', '3.14'] + dl1dh-version: ['latest', 'nightly'] + max-parallel: 6 runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.dl1dh-version == 'nightly' || matrix.python-version == '3.14' }} steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - # Install Miniconda - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - echo "$HOME/miniconda/bin" >> $GITHUB_PATH - source $HOME/miniconda/bin/activate - # Install Mamba via conda (since we don't have mamba yet) - $HOME/miniconda/bin/conda config --add channels conda-forge - $HOME/miniconda/bin/conda install -y mamba=2.0.8 - mamba install -y python=${{ matrix.pyv }} - - name: Add MKL_THREADING_LAYER variable - run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV - - name: Install dependencies with Mamba - run: | - source $HOME/miniconda/bin/activate - mamba env update --file environment.yml --name base - - name: Lint with flake8 - run: | - source $HOME/miniconda/bin/activate - mamba install flake8 - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Install with pip - run: | - pip install -e . - - name: Test with pytest - run: | - source $HOME/miniconda/bin/activate - mamba install pytest - pytest + - uses: actions/checkout@v4 + + - name: Set up Miniconda + run: | + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + echo "$HOME/miniconda/bin" >> $GITHUB_PATH + source $HOME/miniconda/etc/profile.d/conda.sh + conda config --add channels conda-forge + conda install -y mamba + + - name: Create Python environment + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba create -y -n ctlearn python==${{ matrix.python-version }} -c conda-forge + conda activate ctlearn + + - name: Install dependencies + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + sudo apt-get update + sudo apt-get install -y git + pip install --upgrade pip + pip install pylint pylint-exit anybadge eventio pytest flake8 + if [ "${{ matrix.dl1dh-version }}" = "nightly" ]; then + pip install git+https://github.com/cta-observatory/dl1-data-handler.git + else + pip install dl1-data-handler + fi + + - name: Add MKL_THREADING_LAYER variable + run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV + + - name: Lint with flake8 + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Install package with pip + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + pip install -e . + + - name: Run pytest + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + conda activate ctlearn + pytest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 646613b1..984895f4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,13 +2,17 @@ name: Release CD on: release: - types: [published] + types: [published] workflow_dispatch: + pull_request: + paths: + - '.github/workflows/**' + jobs: pypi-publish: name: Publish release to PyPI environment: - name: pypi + name: pypi url: https://pypi.org/project/ctlearn/ permissions: id-token: write @@ -20,64 +24,29 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: ['3.10'] + pyv: ['3.12'] max-parallel: 5 runs-on: ${{ matrix.os }} + steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - #$CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - - - name: Build package - run: | - python --version - pip install -U build - python -m build - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + - uses: actions/checkout@v4 + - name: Set up micromamba (Python ${{ matrix.pyv }}) + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: ctlearn + create-args: >- + python=${{ matrix.pyv }} + pip + cache-environment: true - condapublish: - strategy: - matrix: - os: [ubuntu-22.04] - pyv: ["3.10"] - max-parallel: 5 - runs-on: ${{ matrix.os }} - permissions: - id-token: write - contents: write - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - sudo apt-get install python3-numpy - - - name: publish-to-conda - uses: fcakyon/conda-publish-action@v1.3 - with: - subdir: '.github/conda' - anacondatoken: ${{ secrets.ANACONDA_TOKEN }} + - name: Build package + shell: micromamba-shell {0} + run: | + python --version + pip install -U build + python -m build + + - name: Publish package distributions to PyPI + if: github.event_name == 'release' + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9d5aecee..71fb5123 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,7 +6,7 @@ sphinx: build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.12" python: install: diff --git a/Dockerfile b/Dockerfile index 72881b4a..4a08b151 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Stage 1: Build the ctlearn wheel using a standard Python image -FROM python:3.11 AS builder +FROM python:3.12 AS builder # Install git (needed for setuptools_scm during build) and build tool RUN apt-get update \ @@ -18,9 +18,7 @@ COPY ./.git ./.git/ RUN python -m build --wheel # Stage 2: Create the final runtime image BASED ON NVIDIA's TF image -# TODO what version to use ? after 24.?? TF 2.14 is not found in the container -FROM nvcr.io/nvidia/tensorflow:24.01-tf2-py3 - +FROM nvcr.io/nvidia/tensorflow:25.02-tf2-py3 # Copy only the built wheel from the builder stage's dist directory COPY --from=builder /repo/dist /tmp/dist diff --git a/README.rst b/README.rst index 81556e43..c5c4853f 100644 --- a/README.rst +++ b/README.rst @@ -28,24 +28,24 @@ CTLearn is a package under active development to run deep learning models to ana Installation for users ---------------------- -Download and install `Anaconda `_\ , or, for a minimal installation, `Miniconda `_. -The following command will set up a conda virtual environment, add the -necessary package channels, and install CTLearn specified version and its dependencies: +Installation +------------ + +First, create and activate a fresh conda environment: .. code-block:: bash - CTLEARN_VER=0.10.3 - wget https://raw.githubusercontent.com/ctlearn-project/ctlearn/v$CTLEARN_VER/environment.yml - conda env create -n [ENVIRONMENT_NAME] -f environment.yml - conda activate [ENVIRONMENT_NAME] - pip install ctlearn==$CTLEARN_VER - ctlearn -h + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite + conda activate ctlearn + +The lastest version fo this package can be installed as a pip package: +.. code-block:: bash -This should automatically install all dependencies (NOTE: this may take some time, as by default MKL is included as a dependency of NumPy and it is very large). + pip install ctlearn -See the documentation for further information like `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. +See the documentation for further information like `installation instructions for the IT-cluster `_, `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. Citing this software -------------------- diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index d912ae18..cdfcdb18 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -1,47 +1,171 @@ """ common pytest fixtures for tests in ctlearn. -Credits to ctapipe for the original code. """ +from pathlib import Path + +import numpy as np import pytest +import shutil +from astropy import units as u +from astropy.table import Column, Table +from traitlets.config.loader import Config from ctapipe.core import run_tool +from ctapipe.io import write_table from ctapipe.utils import get_dataset_path +from ctlearn.tools import TrainCTLearnModel +from ctlearn.utils import get_lst1_subarray_description + + +@pytest.fixture(scope="session") +def gamma_simtel_path(): + return get_dataset_path("gamma_test_large.simtel.gz") + @pytest.fixture(scope="session") -def prod5_gamma_simtel_path(): - return get_dataset_path("gamma_prod5.simtel.zst") +def proton_simtel_path(): + return get_dataset_path( + "proton_20deg_0deg_run4___cta-prod5-paranal_desert-2147m-Paranal-dark-100evts.simtel.zst" + ) + @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): """Temporary directory for global dl1 test data""" return tmp_path_factory.mktemp("dl1_") + @pytest.fixture(scope="session") def r1_tmp_path(tmp_path_factory): """Temporary directory for global r1 test data""" return tmp_path_factory.mktemp("r1_") + +def _create_mock_lst1_dl1_file( + output_path: str | Path, + n_events: int = 4, + random_seed: int | None = 0, +) -> Path: + """Write a minimal DL1 file with fake data compatible with the LST1PredictionTool.""" + + rng = np.random.default_rng(random_seed) + output_path = Path(output_path) + + subarray = get_lst1_subarray_description() + tel_id = 1 + n_pixels = subarray.tel[tel_id].camera.geometry.n_pixels + + obs_id = 1 + event_ids = np.arange(1, n_events + 1, dtype=np.int64) + + # Fake per-pixel data + image = rng.uniform(80, 150, size=(n_events, n_pixels)).astype(np.float32) + image_mask = rng.integers(0, 2, size=(n_events, n_pixels), dtype=bool) + peak_time = rng.normal(5.0, 0.5, size=(n_events, n_pixels)).astype(np.float32) + + image_table = Table() + image_table["obs_id"] = np.full(n_events, obs_id, dtype=np.int64) + image_table["event_id"] = event_ids + image_table["tel_id"] = np.full(n_events, tel_id, dtype=np.int16) + image_table.add_column( + Column(image, name="image", dtype=np.float32, shape=(n_pixels,)) + ) + image_table.add_column( + Column(image_mask, name="image_mask", dtype=bool, shape=(n_pixels,)) + ) + image_table.add_column( + Column(peak_time, name="peak_time", dtype=np.float32, shape=(n_pixels,)) + ) + + # DL1 parameter columns required by LST1PredictionTool + parameter_table = Table() + parameter_table["obs_id"] = np.full(n_events, obs_id, dtype=np.int64) + parameter_table["event_id"] = event_ids + parameter_table["tel_id"] = np.full(n_events, tel_id, dtype=np.int16) + parameter_table["intensity"] = rng.uniform(90, 140, size=n_events) + parameter_table["x"] = rng.normal(0.0, 0.05, size=n_events) + parameter_table["y"] = rng.normal(0.0, 0.05, size=n_events) + parameter_table["phi"] = rng.uniform(-np.pi, np.pi, size=n_events) + parameter_table["psi"] = rng.uniform(-np.pi, np.pi, size=n_events) + parameter_table["length"] = rng.uniform(0.05, 0.15, size=n_events) + parameter_table["length_uncertainty"] = rng.uniform(0.001, 0.003, size=n_events) + parameter_table["width"] = rng.uniform(0.02, 0.08, size=n_events) + parameter_table["width_uncertainty"] = rng.uniform(0.001, 0.003, size=n_events) + parameter_table["skewness"] = rng.normal(0.0, 0.2, size=n_events) + parameter_table["kurtosis"] = rng.normal(0.0, 0.2, size=n_events) + parameter_table["time_gradient"] = rng.normal(0.0, 0.01, size=n_events) + parameter_table["intercept"] = rng.normal(0.0, 0.01, size=n_events) + parameter_table["n_pixels"] = np.full(n_events, n_pixels, dtype=np.int16) + parameter_table["n_islands"] = np.zeros(n_events, dtype=np.int16) + parameter_table["event_type"] = np.full(n_events, 32, dtype=np.int16) + parameter_table["az_tel"] = np.full(n_events, 1.0) + parameter_table["alt_tel"] = np.full(n_events, 1.2) + parameter_table["dragon_time"] = np.linspace(1_700_000_000, 1_700_000_300, n_events) + + # Write to DL1 file the subarray description, image and parameter tables + subarray.to_hdf(output_path, overwrite=True) + write_table( + image_table, + output_path, + "/dl1/event/telescope/image/LST_LSTCam", + overwrite=True, + ) + write_table( + parameter_table, + output_path, + "/dl1/event/telescope/parameters/LST_LSTCam", + overwrite=True, + ) + return output_path + + @pytest.fixture(scope="session") -def dl1_gamma_file(dl1_tmp_path, prod5_gamma_simtel_path): +def mock_lst1_dl1_file(tmp_path_factory): + """Path to a session-scoped mock LST-1 DL1 HDF5 file for tests.""" + + output = tmp_path_factory.mktemp("mock_lst1") / "mock_lst1_dl1.h5" + return _create_mock_lst1_dl1_file(output) + + +@pytest.fixture(scope="session") +def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ DL1 file containing both images and parameters from a gamma simulation set. """ from ctapipe.tools.process import ProcessorTool output = dl1_tmp_path / "gamma.dl1.h5" + argv = [ + f"--input={gamma_simtel_path}", + f"--output={output}", + "--write-images", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + ] + assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 + return output + + +@pytest.fixture(scope="session") +def dl1_proton_file(dl1_tmp_path, proton_simtel_path): + """ + DL1 file containing both images and parameters from a proton simulation set. + """ + from ctapipe.tools.process import ProcessorTool + output = dl1_tmp_path / "proton.dl1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={proton_simtel_path}", f"--output={output}", "--write-images", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output + @pytest.fixture(scope="session") -def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): +def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ R1 file containing both waveforms and parameters from a gamma simulation set. """ @@ -49,11 +173,272 @@ def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): output = r1_tmp_path / "gamma.r1.h5" + allowed_tels = [1, 2] argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={gamma_simtel_path}", f"--output={output}", f"--DataWriter.write_r1_waveforms=True", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 - return output \ No newline at end of file + return output + + +@pytest.fixture(scope="session") +def r1_proton_file(r1_tmp_path, proton_simtel_path): + """ + R1 file containing both waveforms and parameters from a proton simulation set. + """ + from ctapipe.tools.process import ProcessorTool + + # Restrict to two LSTs for R1 tests to reduce computational load + allowed_tels = [1, 2] + output = r1_tmp_path / "proton.r1.h5" + argv = [ + f"--input={proton_simtel_path}", + f"--output={output}", + f"--DataWriter.write_r1_waveforms=True", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", + ] + assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 + return output + + +@pytest.fixture(scope="session") +def ctlearn_trained_r1_mono_models(r1_gamma_file, r1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the R1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_mono_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_r1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_r1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy R1 gamma file to the signal directory + shutil.copy(r1_gamma_file, signal_dir) + # Hardcopy R1 proton file to the background directory + shutil.copy(r1_proton_file, background_dir) + + # Configuration to disable quality cuts to increase + # training statistics mainly needed for LSTs. + config = Config( + { + "TableQualityQuery": { + "quality_criteria": [], + }, + }, + ) + + # Restrict to two LSTs for R1 tests to reduce computational load + telescope_type = "LST" + # Loop over reconstruction tasks and train models for each combination + ctlearn_trained_r1_mono_models = {} + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.r1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--TrainCTLearnModel.dl1dh_reader_type=DLWaveformReader", + "--DLWaveformReader.sequence_length=5", + "--DLWaveformReader.focal_length_choice=EQUIVALENT", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.r1.h5", + "--DLWaveformReader.enforce_subarray_equality=False", + ] + ) + + # Run training + assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"] = ( + output_dir / "ctlearn_model.keras" + ) + # Check that the trained model exists + assert ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"].exists() + return ctlearn_trained_r1_mono_models + + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_mono_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + # Configuration to disable quality cuts to increase + # training statistics mainly needed for LSTs. + config = Config( + { + "TableQualityQuery": { + "quality_criteria": [], + }, + }, + ) + + # Define telescope types and their allowed telescopes + telescope_types = { + "LST": [1, 2], + "MST": [7, 13, 15, 16, 17, 19], + # "SST": [30, 37, 43, 44, 53], + } + image_mapper_types = { + "LST": "BilinearMapper", + "MST": "OversamplingMapper", + # "SST": "SquareMapper", + } + # Loop over telescope types and reconstruction tasks + # and train models for each combination + ctlearn_trained_dl1_mono_models = {} + for telescope_type, allowed_tels in telescope_types.items(): + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", + ] + ) + + # Run training + assert ( + run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + ) + + ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"] = ( + output_dir / "ctlearn_model.keras" + ) + # Check that the trained model exists + assert ctlearn_trained_dl1_mono_models[ + f"{telescope_type}_{reco_task}" + ].exists() + return ctlearn_trained_dl1_mono_models + + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_stereo_models( + dl1_gamma_file, dl1_proton_file, tmp_path_factory +): + """ + Test training CTLearn model using the R1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_stereo_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + # Configuration to disable quality cuts to increase + # training statistics mainly needed for LSTs. + config = Config( + { + "TableQualityQuery": { + "quality_criteria": [], + }, + }, + ) + + # Restrict to three MSTs for DL1 tests to reduce computational load + telescope_type = "MST" + allowed_tels = [7, 13, 15] + + # Loop over reconstruction tasks and train models for each combination + ctlearn_trained_dl1_stereo_models = {} + for reco_task in ["type", "energy", "skydirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--TrainCTLearnModel.stack_telescope_images=True", + "--DLImageReader.mode=stereo", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ] + ) + + # Run training + assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_dl1_stereo_models[f"{telescope_type}_{reco_task}"] = ( + output_dir / "ctlearn_model.keras" + ) + # Check that the trained model exists + assert ctlearn_trained_dl1_stereo_models[ + f"{telescope_type}_{reco_task}" + ].exists() + return ctlearn_trained_dl1_stereo_models diff --git a/ctlearn/core/loader.py b/ctlearn/core/loader.py index 5b723f9c..92fd96c2 100644 --- a/ctlearn/core/loader.py +++ b/ctlearn/core/loader.py @@ -155,7 +155,7 @@ def _get_mono_item(self, batch): """ # Retrieve the telescope images and store in the features dictionary labels = {} - features = {"input": batch["features"].data} + features = batch["features"].data if "type" in self.tasks: labels["type"] = to_categorical( batch["true_shower_primary_class"].data, @@ -186,9 +186,6 @@ def _get_mono_item(self, batch): ), axis=1, ) - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] return features, labels def _get_stereo_item(self, batch): @@ -303,13 +300,10 @@ def _get_stereo_item(self, batch): ) # Store the fatures in the features dictionary if "features" in batch.colnames: - features = {"input": np.array(features)} + features = np.array(features) # TDOO: Add support for both feature vectors if "mono_feature_vectors" in batch.colnames: - features = {"input": np.array(mono_feature_vectors)} + features = np.array(mono_feature_vectors) if "stereo_feature_vectors" in batch.colnames: - features = {"input": np.array(stereo_feature_vectors)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] + features = np.array(stereo_feature_vectors) return features, labels diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py index 07c79d46..bfac48c2 100644 --- a/ctlearn/core/model.py +++ b/ctlearn/core/model.py @@ -288,7 +288,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Get model arcihtecture parameters for the backbone filters_list = [layer["filters"] for layer in self.architecture] kernel_sizes = [layer["kernel_size"] for layer in self.architecture] @@ -472,7 +472,7 @@ def _build_backbone(self, input_shape): Keras input layer object for the backbone model. """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Apply initial padding if specified if self.init_padding > 0: network_input = keras.layers.ZeroPadding2D( @@ -880,7 +880,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Set the backbone model to be trainable or not for layer in self.model.layers: if layer.name.endswith("_block"): diff --git a/ctlearn/core/tests/test_loader.py b/ctlearn/core/tests/test_loader.py index 97f87172..7fe71900 100644 --- a/ctlearn/core/tests/test_loader.py +++ b/ctlearn/core/tests/test_loader.py @@ -1,17 +1,17 @@ -import pytest from traitlets.config.loader import Config from dl1_data_handler.reader import DLImageReader from ctlearn.core.loader import DLDataLoader -def test_data_loader(dl1_tmp_path, dl1_gamma_file): +def test_data_loader(dl1_gamma_file): """check""" # Create a configuration suitable for the test config = Config( { "DLImageReader": { "allowed_tels": [4], + "focal_length_choice": "EQUIVALENT", }, } ) @@ -34,4 +34,4 @@ def test_data_loader(dl1_tmp_path, dl1_gamma_file): and "skydirection" in labels ) # Check the shape of the features - assert features["input"].shape == (1, 110, 110, 2) + assert features.shape == (1, 110, 110, 2) diff --git a/ctlearn/tools/__init__.py b/ctlearn/tools/__init__.py index 996469a1..d54df32d 100644 --- a/ctlearn/tools/__init__.py +++ b/ctlearn/tools/__init__.py @@ -1,2 +1,13 @@ """ctlearn command line tools. """ + +from .train_model import TrainCTLearnModel +from .predict_LST1 import LST1PredictionTool +from .predict_model import MonoPredictCTLearnModel, StereoPredictCTLearnModel + +__all__ = [ + "TrainCTLearnModel", + "LST1PredictionTool", + "MonoPredictCTLearnModel", + "StereoPredictCTLearnModel" +] \ No newline at end of file diff --git a/ctlearn/tools/predict_LST1.py b/ctlearn/tools/predict_LST1.py index e27e68c6..58069e56 100644 --- a/ctlearn/tools/predict_LST1.py +++ b/ctlearn/tools/predict_LST1.py @@ -6,8 +6,7 @@ import tables import keras from astropy import units as u -from astropy.coordinates.earth import EarthLocation -from astropy.coordinates import AltAz, Angle, SkyCoord +from astropy.coordinates import AltAz, SkyCoord from astropy.table import Table, join, setdiff, vstack from astropy.time import Time @@ -27,26 +26,36 @@ flag, CaselessStrEnum, ComponentName, - Unicode, + Dict, UseEnum, classes_with_traits, ) from ctapipe.instrument.optics import FocalLengthKind from ctapipe.io import read_table, write_table +from ctapipe.io.hdf5dataformat import ( + DL1_SUBARRAY_TRIGGER_TABLE, + DL1_TEL_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_TRIGGER_TABLE, + DL2_TEL_PARTICLETYPE_GROUP, + DL2_TEL_ENERGY_GROUP, + DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_PARTICLETYPE_GROUP, + DL2_SUBARRAY_ENERGY_GROUP, + DL2_SUBARRAY_GEOMETRY_GROUP, +) from ctapipe.reco.utils import add_defaults_and_meta -from ctlearn.core.model import LoadedModel -from ctlearn.utils import get_lst1_subarray_description +from ctlearn import __version__ as ctlearn_version +from ctlearn.utils import get_lst1_subarray_description, validate_trait_dict + from dl1_data_handler.image_mapper import ImageMapper from dl1_data_handler.reader import ( get_unmapped_image, TableQualityQuery, ) -POINTING_GROUP = "/dl1/monitoring/telescope/pointing" -DL1_TELESCOPE_GROUP = "/dl1/event/telescope" -DL2_TELESCOPE_GROUP = "/dl2/event/telescope" -DL2_SUBARRAY_GROUP = "/dl2/event/subarray" SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] @@ -111,16 +120,24 @@ class LST1PredictionTool(Tool): help="Set whether to include dl2 subarray-event-wise data in the output file.", ).tag(config=True) - prefix = Unicode( - default_value="CTLearn", + prefixes = Dict( + default_value={ + "type": "CTLearnClassifier", + "energy": "CTLearnRegressor", + "cameradirection": "CTLearnCameraReconstructor", + "all": "CTLearn", + }, allow_none=False, - help="Name of the reconstruction algorithm used to generate the dl2 data.", + help=( + "Name of the reconstruction algorithm used " + "to generate the dl2 data for each task." + ), ).tag(config=True) load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the classification of the primary particle type." ), allow_none=True, @@ -132,7 +149,7 @@ class LST1PredictionTool(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle energy." ), allow_none=True, @@ -144,7 +161,7 @@ class LST1PredictionTool(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle arrival direction " "based on the camera coordinate offsets." ), @@ -260,6 +277,11 @@ class LST1PredictionTool(Tool): classes = classes_with_traits(ImageMapper) def setup(self): + self.log.info("ctlearn version %s", ctlearn_version) + # Validate the prefixes trait dictionary + validate_trait_dict( + self.prefixes, ["type", "energy", "cameradirection", "all"] + ) # Save dl1 image and parameters tree schemas and tel id for easy access self.image_table_path = "/dl1/event/telescope/image/LST_LSTCam" self.parameter_table_name = "/dl1/event/telescope/parameters/LST_LSTCam" @@ -371,13 +393,13 @@ def start(self): write_table( pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{self.tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{self.tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{self.tel_id:03d}", ) # Set the time format to MJD since in the other table we store the time in MJD time.format = "mjd" @@ -390,13 +412,13 @@ def start(self): write_table( trigger_table, self.output_path, - "/dl1/event/telescope/trigger", + DL1_TEL_TRIGGER_TABLE, overwrite=self.overwrite, ) self.log.info( "DL1 telescope trigger table was stored in '%s' under '%s'", self.output_path, - "/dl1/event/telescope/trigger", + DL1_TEL_TRIGGER_TABLE, ) trigger_table.keep_columns(["obs_id", "event_id", "time"]) trigger_table.add_column( @@ -407,13 +429,13 @@ def start(self): write_table( trigger_table, self.output_path, - "/dl1/event/subarray/trigger", + DL1_SUBARRAY_TRIGGER_TABLE, overwrite=self.overwrite, ) self.log.info( "DL1 subarray trigger table was stored in '%s' under '%s'", self.output_path, - "/dl1/event/subarray/trigger", + DL1_SUBARRAY_TRIGGER_TABLE, ) # Create the dl1 parameters table parameter_table.rename_column("intensity", "hillas_intensity") @@ -457,13 +479,13 @@ def start(self): write_table( parameter_table, self.output_path, - f"/dl1/event/telescope/parameters/tel_{self.tel_id:03d}", + f"{DL1_TEL_PARAMETERS_GROUP}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL1 parameters table was stored in '%s' under '%s'", self.output_path, - f"/dl1/event/telescope/parameters/tel_{self.tel_id:03d}", + f"{DL1_TEL_PARAMETERS_GROUP}/tel_{self.tel_id:03d}", ) # Add additional columns to the parameter table @@ -513,10 +535,7 @@ def start(self): # Get the unmapped image image = get_unmapped_image(event, self.channels, self.transforms) data.append(self.image_mapper.map_image(image)) - input_data = {"input": np.array(data)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - input_data = input_data["input"] + input_data = np.array(data) event_id.extend(dl1_table["event_id"].data) tel_azimuth.extend(dl1_table["tel_az"].data) @@ -558,27 +577,27 @@ def start(self): if self.load_type_model_from is not None: classification_tel_table = example_identifiers.copy() classification_tel_table.add_column( - prediction, name=f"{self.prefix}_tel_prediction" + prediction, name=f"{self.prefixes['type']}_tel_prediction" ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = self._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], + columns=[f"{self.prefixes['type']}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], ) classification_tel_table = vstack([classification_tel_table, nan_table]) classification_tel_table.sort(TELESCOPE_EVENT_KEYS) - classification_is_valid = ~np.isnan(classification_tel_table[f"{self.prefix}_tel_prediction"].data, dtype=bool) + classification_is_valid = ~np.isnan(classification_tel_table[f"{self.prefixes['type']}_tel_prediction"].data, dtype=bool) classification_tel_table.add_column( classification_is_valid, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['type']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( classification_tel_table, ParticleClassificationContainer, - prefix=self.prefix, + prefix=self.prefixes['type'], add_tel_prefix=True, ) # Save the prediction to the output file @@ -586,13 +605,13 @@ def start(self): write_table( classification_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{self.tel_id:03d}", ) # Write the mono telescope prediction to the subarray prediction table if self.dl2_subarray: @@ -604,19 +623,19 @@ def start(self): colname, colname.replace("_tel", "") ) classification_subarray_table.add_column( - [[val] for val in classification_is_valid], name=f"{self.prefix}_telescopes" + [[val] for val in classification_is_valid], name=f"{self.prefixes['type']}_telescopes" ) # Save the prediction to the output file write_table( classification_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) # Adding the feature vectors for the classification if self.dl1_features: @@ -625,10 +644,10 @@ def start(self): ) feature_vector_table.add_column( classification_fvs, - name=f"{self.prefix}_tel_classification_feature_vectors", + name=f"{self.prefixes['all']}_tel_classification_feature_vectors", ) if nonexample_identifiers is not None: - fvs_columns_list.append(f"{self.prefix}_tel_classification_feature_vectors") + fvs_columns_list.append(f"{self.prefixes['all']}_tel_classification_feature_vectors") fvs_shapes_list.append( ( len(nonexample_identifiers), @@ -640,26 +659,26 @@ def start(self): # Convert the reconstructed energy from log10(TeV) to TeV reco_energy = u.Quantity(np.power(10, np.squeeze(energy)), unit=u.TeV) # Add the reconstructed energy to the prediction table - energy_tel_table.add_column(reco_energy, name=f"{self.prefix}_tel_energy") + energy_tel_table.add_column(reco_energy, name=f"{self.prefixes['energy']}_tel_energy") # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = self._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], + columns=[f"{self.prefixes['energy']}_tel_energy"], shapes=[(len(nonexample_identifiers),)], ) energy_tel_table = vstack([energy_tel_table, nan_table]) energy_tel_table.sort(TELESCOPE_EVENT_KEYS) - energy_is_valid = ~np.isnan(energy_tel_table[f"{self.prefix}_tel_energy"].data, dtype=bool) + energy_is_valid = ~np.isnan(energy_tel_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool) energy_tel_table.add_column( energy_is_valid, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['energy']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( energy_tel_table, ReconstructedEnergyContainer, - prefix=self.prefix, + prefix=self.prefixes['energy'], add_tel_prefix=True, ) # Save the prediction to the output file @@ -667,13 +686,13 @@ def start(self): write_table( energy_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{self.tel_id:03d}", ) # Write the mono telescope prediction to the subarray prediction table if self.dl2_subarray: @@ -685,19 +704,19 @@ def start(self): colname, colname.replace("_tel", "") ) energy_subarray_table.add_column( - [[val] for val in energy_is_valid], name=f"{self.prefix}_telescopes" + [[val] for val in energy_is_valid], name=f"{self.prefixes['energy']}_telescopes" ) # Save the prediction to the output file write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) # Adding the feature vectors for the energy regression if self.dl1_features: @@ -706,10 +725,10 @@ def start(self): ) feature_vector_table.add_column( energy_fvs, - name=f"{self.prefix}_tel_energy_feature_vectors", + name=f"{self.prefixes['all']}_tel_energy_feature_vectors", ) if nonexample_identifiers is not None: - fvs_columns_list.append(f"{self.prefix}_tel_energy_feature_vectors") + fvs_columns_list.append(f"{self.prefixes['all']}_tel_energy_feature_vectors") fvs_shapes_list.append( ( len(nonexample_identifiers), @@ -750,34 +769,34 @@ def start(self): reco_direction = cam_coord_offset.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table direction_tel_table.add_column( - reco_direction.az.to(u.deg), name=f"{self.prefix}_tel_az" + reco_direction.az.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_az" ) direction_tel_table.add_column( - reco_direction.alt.to(u.deg), name=f"{self.prefix}_tel_alt" + reco_direction.alt.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_alt" ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = self._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_az", f"{self.prefix}_tel_alt"], + columns=[f"{self.prefixes['cameradirection']}_tel_az", f"{self.prefixes['cameradirection']}_tel_alt"], shapes=[(len(nonexample_identifiers),), (len(nonexample_identifiers),)], ) direction_tel_table = vstack([direction_tel_table, nan_table]) direction_tel_table.keep_columns( TELESCOPE_EVENT_KEYS - + [f"{self.prefix}_tel_az", f"{self.prefix}_tel_alt"] + + [f"{self.prefixes['cameradirection']}_tel_az", f"{self.prefixes['cameradirection']}_tel_alt"] ) direction_tel_table.sort(TELESCOPE_EVENT_KEYS) - direction_is_valid = ~np.isnan(direction_tel_table[f"{self.prefix}_tel_az"].data, dtype=bool) + direction_is_valid = ~np.isnan(direction_tel_table[f"{self.prefixes['cameradirection']}_tel_az"].data, dtype=bool) direction_tel_table.add_column( direction_is_valid, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['cameradirection']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( direction_tel_table, ReconstructedGeometryContainer, - prefix=self.prefix, + prefix=self.prefixes['cameradirection'], add_tel_prefix=True, ) # Save the prediction to the output file @@ -785,13 +804,13 @@ def start(self): write_table( direction_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{self.tel_id:03d}", ) # Write the mono telescope prediction to the subarray prediction table if self.dl2_subarray: @@ -803,19 +822,19 @@ def start(self): colname, colname.replace("_tel", "") ) direction_subarray_table.add_column( - [[val] for val in direction_is_valid], name=f"{self.prefix}_telescopes" + [[val] for val in direction_is_valid], name=f"{self.prefixes['cameradirection']}_telescopes" ) # Save the prediction to the output file write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", ) # Adding the feature vectors for the arrival direction regression if self.dl1_features: @@ -824,10 +843,10 @@ def start(self): ) feature_vector_table.add_column( direction_fvs, - name=f"{self.prefix}_tel_direction_feature_vectors", + name=f"{self.prefixes['all']}_tel_direction_feature_vectors", ) if nonexample_identifiers is not None: - fvs_columns_list.append(f"{self.prefix}_tel_direction_feature_vectors") + fvs_columns_list.append(f"{self.prefixes['all']}_tel_direction_feature_vectors") fvs_shapes_list.append( ( len(nonexample_identifiers), @@ -850,19 +869,19 @@ def start(self): # Add is_valid column to the feature vector table feature_vector_table.add_column( is_valid_col, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['all']}_tel_is_valid", ) # Save the prediction to the output file write_table( feature_vector_table, self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{self.tel_id:03d}", ) def finish(self): @@ -892,7 +911,7 @@ def _split_model(self, model): backbone = model.get_layer(index=1) # Create a new head model with the same layers as the original model. # The output of the backbone model is the input of the head model. - backbone_output_shape = keras.Input(model.layers[2].input_shape[1:]) + backbone_output_shape = keras.Input(model.layers[2].input.shape[1:]) x = backbone_output_shape for layer in model.layers[2:]: x = layer(x) diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 0eaa8b23..dba16638 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -3,9 +3,11 @@ """ import atexit -import pathlib +import uuid +import warnings + import numpy as np -import os +import tables import tensorflow as tf import keras @@ -20,6 +22,7 @@ setdiff, unique, ) +from astropy.time import Time from ctapipe.containers import ( ParticleClassificationContainer, @@ -34,16 +37,46 @@ Int, Path, flag, - Set, Dict, - List, - CaselessStrEnum, ComponentName, - Unicode, classes_with_traits, ) from ctapipe.monitoring.interpolation import PointingInterpolator +from ctapipe.instrument import SubarrayDescription from ctapipe.io import read_table, write_table, HDF5Merger +from ctapipe.io.datalevels import DataLevel +from ctapipe.io.hdf5dataformat import ( + DL0_TEL_POINTING_GROUP, + DL1_SUBARRAY_GROUP, + DL1_SUBARRAY_POINTING_GROUP, + DL1_SUBARRAY_TRIGGER_TABLE, + DL1_TEL_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_TRIGGER_TABLE, + DL2_EVENT_STATISTICS_GROUP, + FIXED_POINTING_GROUP, + R0_TEL_GROUP, + R1_TEL_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, + SIMULATION_RUN_TABLE, + SIMULATION_SHOWER_TABLE, + DL2_TEL_PARTICLETYPE_GROUP, + DL2_TEL_ENERGY_GROUP, + DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_GROUP, + DL2_SUBARRAY_PARTICLETYPE_GROUP, + DL2_SUBARRAY_ENERGY_GROUP, + DL2_SUBARRAY_GEOMETRY_GROUP, +) from ctapipe.reco.reconstructor import ReconstructionProperty from ctapipe.reco.stereo_combination import StereoCombiner from ctapipe.reco.utils import add_defaults_and_meta @@ -52,24 +85,45 @@ ProcessType, LST_EPOCH, ) +from ctlearn import __version__ as ctlearn_version from ctlearn.core.loader import DLDataLoader +from ctlearn.utils import validate_trait_dict -SIMULATION_CONFIG_TABLE = "/configuration/simulation/run" -FIXED_POINTING_GROUP = "/configuration/telescope/pointing" -POINTING_GROUP = "/dl1/monitoring/telescope/pointing" -SUBARRAY_POINTING_GROUP = "/dl1/monitoring/subarray/pointing" -DL1_TELESCOPE_GROUP = "/dl1/event/telescope" -DL1_SUBARRAY_GROUP = "/dl1/event/subarray" -DL2_SUBARRAY_GROUP = "/dl2/event/subarray" -DL2_TELESCOPE_GROUP = "/dl2/event/telescope" +# Convienient constants for column names and table keys +CONFIG_INSTRUMENT_SUBARRAY_LAYOUT = "/configuration/instrument/subarray/layout" +CONFIG_INSTRUMENT_TEL = "/configuration/instrument/telescope" +CONFIG_INSTRUMENT_TEL_CAMERA = "/configuration/instrument/telescope/camera" SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] -TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] - -__all__ = [ - "PredictCTLearnModel", - "MonoPredictCTLearnModel", - "StereoPredictCTLearnModel", +TEL_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] +TEL_ITER_GROUPS = [ + R0_TEL_GROUP, + R1_TEL_GROUP, + FIXED_POINTING_GROUP, + DL0_TEL_POINTING_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, ] +DATALEVEL_TO_GROUP = { + DataLevel.R0: R0_TEL_GROUP, + DataLevel.R1: R1_TEL_GROUP, + DataLevel.DL1_IMAGES: DL1_TEL_IMAGES_GROUP, + DataLevel.DL1_PARAMETERS: DL1_TEL_PARAMETERS_GROUP, + DataLevel.DL1_MUON: DL1_TEL_MUON_GROUP, + DataLevel.DL2: DL2_SUBARRAY_GROUP, +} + + +class CannotPredict(OSError): + """Raised when trying to predict an incompatible file""" class PredictCTLearnModel(Tool): @@ -88,8 +142,6 @@ class PredictCTLearnModel(Tool): ---------- input_url : pathlib.Path Input ctapipe HDF5 files including pixel-wise image or waveform data. - use_HDF5Merger : bool - Set whether to use the HDF5Merger component to copy the selected tables from the input file to the output file. dl1_features : bool Set whether to include the dl1 feature vectors in the output file. dl2_telescope : bool @@ -107,19 +159,17 @@ class PredictCTLearnModel(Tool): prefix : str Name of the reconstruction algorithm used to generate the dl2 data. load_type_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the classification of the primary particle type. + Path to a Keras model file (Keras3) for the classification of the primary particle type. load_energy_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression of the primary particle energy. + Path to a Keras model file (Keras3) for the regression of the primary particle energy. load_cameradirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on camera coordinate offsets. load_skydirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on spherical coordinate offsets. output_path : pathlib.Path Output path to save the dl2 prediction results. - overwrite_tables : bool - Overwrite the table in the output file if it exists. keras_verbose : int Verbosity mode of Keras during the prediction. strategy : tf.distribute.Strategy @@ -141,7 +191,7 @@ class PredictCTLearnModel(Tool): Finish the tool. _predict_with_model(model_path) Load and predict with a CTLearn model. - _predict_classification(example_identifiers) + _predict_particletype(example_identifiers) Predict the classification of the primary particle type. _predict_energy(example_identifiers) Predict the energy of the primary particle. @@ -153,11 +203,11 @@ class PredictCTLearnModel(Tool): Transform to camera coordinate offsets w.r.t. the telescope pointing to Alt/Az coordinates. _transform_spher_coord_offsets_to_sky(table) Transform to spherical coordinate offsets w.r.t. the telescope pointing to Alt/Az coordinates. - _create_nan_table(nonexample_identifiers, columns, shapes) + _create_nan_table(nonexample_identifiers, columns, shapes, reco_task) Create a table with NaNs for missing predictions. _store_pointing(all_identifiers) Store the telescope pointing table from to the output file. - _create_feature_vectors_table(example_identifiers, nonexample_identifiers, classification_feature_vectors, energy_feature_vectors, direction_feature_vectors) + _create_feature_vectors_table(example_identifiers, nonexample_identifiers, particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors) Create the table for the DL1 feature vectors. """ @@ -169,16 +219,6 @@ class PredictCTLearnModel(Tool): file_ok=True, ).tag(config=True) - use_HDF5Merger = Bool( - default_value=True, - allow_none=False, - help=( - "Set whether to use the HDF5Merger component to copy the selected tables " - "from the input file to the output file. CAUTION: This can only be used " - "if the output file not exists." - ), - ).tag(config=True) - dl1_features = Bool( default_value=False, allow_none=False, @@ -219,16 +259,25 @@ class PredictCTLearnModel(Tool): ), ).tag(config=True) - prefix = Unicode( - default_value="CTLearn", + prefixes = Dict( + default_value={ + "type": "CTLearnClassifier", + "energy": "CTLearnRegressor", + "cameradirection": "CTLearnCameraReconstructor", + "skydirection": "CTLearnSkyReconstructor", + "all": "CTLearn", + }, allow_none=False, - help="Name of the reconstruction algorithm used to generate the dl2 data.", + help=( + "Name of the reconstruction algorithm used " + "to generate the dl2 data for each task." + ), ).tag(config=True) load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the classification " + "Path to a Keras model file (Keras3) for the classification " "of the primary particle type." ), allow_none=True, @@ -240,7 +289,7 @@ class PredictCTLearnModel(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle energy." ), allow_none=True, @@ -252,7 +301,7 @@ class PredictCTLearnModel(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the reconstruction " "of the primary particle arrival direction based on camera coordinate offsets." ), allow_none=True, @@ -264,7 +313,7 @@ class PredictCTLearnModel(Tool): load_skydirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the reconstruction " "of the primary particle arrival direction based on spherical coordinate offsets." ), allow_none=True, @@ -285,12 +334,6 @@ class PredictCTLearnModel(Tool): help="Output path to save the dl2 prediction results", ).tag(config=True) - overwrite_tables = Bool( - default_value=True, - allow_none=False, - help="Overwrite the table in the output file if it exists", - ).tag(config=True) - keras_verbose = Int( default_value=1, min=0, @@ -315,6 +358,12 @@ class PredictCTLearnModel(Tool): } flags = { + **flag( + "overwrite", + "HDF5Merger.overwrite", + "Overwrite the output file if it exists", + "Do not overwrite the output file if it exists", + ), **flag( "dl1-features", "PredictCTLearnModel.dl1_features", @@ -330,14 +379,8 @@ class PredictCTLearnModel(Tool): **flag( "dl2-subarray", "PredictCTLearnModel.dl2_subarray", - "Include dl2 telescope-event-wise data in the output file", - "Exclude dl2 telescope-event-wise data in the output file", - ), - **flag( - "use-HDF5Merger", - "PredictCTLearnModel.use_HDF5Merger", - "Copy data using the HDF5Merger component (CAUTION: This can not be used if the output file already exists)", - "Do not copy data using the HDF5Merger component", + "Include dl2 subarray-event-wise data in the output file", + "Exclude dl2 subarray-event-wise data in the output file", ), **flag( "r0-waveforms", @@ -380,22 +423,18 @@ class PredictCTLearnModel(Tool): classes = classes_with_traits(DLDataReader) def setup(self): - # Check if the ctapipe HDF5Merger component is enabled - if self.use_HDF5Merger: - if os.path.exists(self.output_path): - raise ToolConfigurationError( - f"The output file '{self.output_path}' already exists. Please use " - "'--no-use-HDF5Merger' to disable the usage of the HDF5Merger component." - ) - # Copy selected tables from the input file to the output file - self.log.info("Copying to output destination.") - with HDF5Merger(self.output_path, parent=self) as merger: - merger(self.input_url) - else: - self.log.info( - "No copy to output destination, since the usage of the HDF5Merger component is disabled." - ) - + self.activity_start_time = Time.now() + self.log.info("ctlearn version %s", ctlearn_version) + # Validate the prefixes trait dictionary + validate_trait_dict( + self.prefixes, ["type", "energy", "cameradirection", "skydirection", "all"] + ) + # Copy selected tables from the input file to the output file + self.log.info("Copying to output destination.") + with HDF5Merger( + self.output_path, dl2_subarray=False, dl2_telescope=False, parent=self + ) as merger: + merger(self.input_url) # Create a MirroredStrategy. self.strategy = tf.distribute.MirroredStrategy() atexit.register(self.strategy._extended._collective_ops._lock.locked) # type: ignore @@ -421,10 +460,228 @@ def setup(self): self.last_batch_size = len(self.indices) % ( self.batch_size * self.strategy.num_replicas_in_sync ) + # Ensure subarray consistency in the output file + self._ensure_subarray_consistency() def finish(self): + # Overwrite CTAO reference metadata to the output file + self._overwrite_meta() self.log.info("Tool is shutting down") + def _overwrite_meta(self): + """Overwrite CTAO metadata in the output file.""" + # TODO: Upgrade to new CTAO metatdata standard when available + with warnings.catch_warnings(): + warnings.simplefilter("ignore", tables.NaturalNameWarning) + with tables.open_file(self.output_path, mode="r+") as h5_file: + # Update CTA Activity metadata + h5_file.root._v_attrs["CTA ACTIVITY ID"] = str(uuid.uuid4()) + h5_file.root._v_attrs["CTA ACTIVITY NAME"] = self.name + h5_file.root._v_attrs["CTA ACTIVITY SOFTWARE NAME"] = "ctlearn" + h5_file.root._v_attrs["CTA ACTIVITY SOFTWARE VERSION"] = ctlearn_version + h5_file.root._v_attrs["CTA ACTIVITY START TIME"] = ( + self.activity_start_time.iso + ) + h5_file.root._v_attrs["CTA ACTIVITY STOP TIME"] = Time.now().iso + # Update CTA Product metadata + h5_file.root._v_attrs["CTA PRODUCT DATA LEVELS"] = ( + self._get_data_levels(h5_file) + ) + h5_file.root._v_attrs["CTA PRODUCT CREATION TIME"] = ( + self.activity_start_time.iso + ) + h5_file.root._v_attrs["CTA PRODUCT ID"] = str(uuid.uuid4()) + h5_file.flush() + + def _get_data_levels(self, h5file): + """Get the data levels present in the HDF5 file.""" + data_levels = { + level.name + for level, group in DATALEVEL_TO_GROUP.items() + if hasattr(h5file.root, group) + } + return ",".join(sorted(data_levels)) + + def _ensure_subarray_consistency(self): + """ + Align subarray metadata and trigger tables with the selected telescopes. + + When only a subset of telescopes is processed, overwrite the output file's + SubarrayDescription and trim the DL1 trigger tables to keep events that + involve the selected telescopes. Also rebuild the subarray trigger table + with the corresponding telescope participation masks. + """ + + input_subarray = SubarrayDescription.from_hdf( + self.input_url, + focal_length_choice=self.dl1dh_reader.focal_length_choice, + ) + if input_subarray == self.dl1dh_reader.subarray: + return + + self.dl1dh_reader.subarray.to_hdf(self.output_path, overwrite=True) + self.log.info("SubarrayDescription was stored in '%s'", self.output_path) + + tel_trigger_table = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + mask = np.isin(tel_trigger_table["tel_id"], self.dl1dh_reader.tel_ids) + tel_trigger_table = tel_trigger_table[mask] + tel_trigger_table.sort(TEL_EVENT_KEYS) + + write_table( + tel_trigger_table, + self.output_path, + DL1_TEL_TRIGGER_TABLE, + overwrite=True, + ) + + subarray_trigger_table = tel_trigger_table.copy() + subarray_trigger_table.keep_columns( + SUBARRAY_EVENT_KEYS + ["time", "event_type"] + ) + subarray_trigger_table = unique( + subarray_trigger_table, keys=SUBARRAY_EVENT_KEYS + ) + + tel_trigger_groups = tel_trigger_table.group_by(SUBARRAY_EVENT_KEYS) + tel_with_trigger = [] + for tel_trigger in tel_trigger_groups.groups: + tel_with_trigger_mask = np.zeros(len(self.dl1dh_reader.tel_ids), dtype=bool) + tel_with_trigger_mask[ + self.dl1dh_reader.subarray.tel_ids_to_indices(tel_trigger["tel_id"]) + ] = True + tel_with_trigger.append(tel_with_trigger_mask) + + subarray_trigger_table.add_column( + tel_with_trigger, index=-2, name="tels_with_trigger" + ) + + write_table( + subarray_trigger_table, + self.output_path, + DL1_SUBARRAY_TRIGGER_TABLE, + overwrite=True, + ) + # Update the simulation shower table to keep only events present in the subarray trigger table + subarray_trigger_table.keep_columns(SUBARRAY_EVENT_KEYS) + sim_shower_table = read_table( + self.output_path, + SIMULATION_SHOWER_TABLE, + ) + sim_shower_table = join( + sim_shower_table, + subarray_trigger_table, + keys=SUBARRAY_EVENT_KEYS, + join_type="right", + ) + sim_shower_table.sort(SUBARRAY_EVENT_KEYS) + write_table( + sim_shower_table, + self.output_path, + SIMULATION_SHOWER_TABLE, + overwrite=True, + ) + # Delete telescope-specific tables for unselected telescopes + self._delete_unselected_telescope_tables() + + def _delete_unselected_telescope_tables(self): + """ + Delete telescope-specific tables for unselected telescopes from the output file. + + Iterates through all telescope-related groups in the HDF5 file and removes + tables corresponding to telescopes that are not in the selected telescope list. + This ensures the output file only contains data for the telescopes that were + processed. The camera configuration tables are also pruned based on the camera indices. + """ + # Open the HDF5 file to prune the unselected telescope tables and camera configurations + with tables.open_file(self.output_path, mode="r+") as h5_file: + + def prune_group(group, valid_ids): + for table in group._f_iter_nodes("Table"): + idx = int(table._v_name.split("_")[-1]) + if idx not in valid_ids: + table._f_remove() + + # Telescope-specific tables + tel_ids = set(self.dl1dh_reader.tel_ids) + for group_name in TEL_ITER_GROUPS: + group = getattr(h5_file.root, group_name, None) + if group is not None: + prune_group(group, tel_ids) + + # Camera configuration tables + layout_node = getattr(h5_file.root, CONFIG_INSTRUMENT_SUBARRAY_LAYOUT, None) + camera_group = getattr(h5_file.root, CONFIG_INSTRUMENT_TEL_CAMERA, None) + if not (layout_node and camera_group): + return + # layout can be either a Table or a Group containing a Table + layout_table = ( + layout_node + if isinstance(layout_node, tables.Table) + else next(layout_node._f_iter_nodes("Table")) + ) + camera_indices = set(layout_table.col("camera_index")) + prune_group(camera_group, camera_indices) + + def _create_nan_table(self, nonexample_identifiers, columns, shapes, reco_task): + """ + Create a table with NaNs for missing predictions. + + This method creates a table with NaNs for missing predictions for the non-example identifiers. + In stereo mode, the table also a column for the valid telescopes is added with all False values. + + Parameters: + ----------- + nonexample_identifiers : astropy.table.Table + Table containing the non-example identifiers. + columns : list of str + List of column names to create in the table. + shapes : list of shapes + List of shapes for the columns to create in the table. + reco_task : str + Reconstruction task name. + + Returns: + -------- + nan_table : astropy.table.Table + Table containing NaNs for missing predictions. + """ + # Create a table with NaNs for missing predictions + nan_table = nonexample_identifiers.copy() + for column_name, shape in zip(columns, shapes): + nan_table.add_column(np.full(shape, np.nan), name=column_name) + # Add that no telescope is valid for the non-example identifiers in stereo mode + if self.dl1dh_reader.mode == "stereo": + nan_table.add_column( + np.zeros( + (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), + dtype=bool, + ), + name=f"{self.prefixes[reco_task]}_telescopes", + ) + return nan_table + + def deduplicate_first_valid( + self, + table: Table, + keys=("obs_id", "event_id"), + valid_col="CTLearn_is_valid", + ): + """ + Return a deduplicated Astropy Table. + + For each group defined by `keys`, keep the first row where + `valid_col` is True. If none are valid, keep the first row. + """ + + t = table.copy() + + t.sort(list(keys) + [valid_col], reverse=[False] * len(keys) + [True]) + + return unique(t, keys=list(keys), keep="first") + def _predict_with_model(self, model_path): """ Load and predict with a CTLearn model. @@ -435,7 +692,7 @@ def _predict_with_model(self, model_path): Parameters ---------- model_path : str - Path to a Keras model file (Keras3) or directory (Keras2). + Path to a Keras model file (Keras3). Returns ------- @@ -482,15 +739,24 @@ def _predict_with_model(self, model_path): backbone_model = model.get_layer(index=1) # Create a new head model with the same layers as the original model. # The output of the backbone model is the input of the head model. - backbone_output_shape = keras.Input(model.layers[2].input_shape[1:]) + backbone_output_shape = keras.Input(model.layers[2].input.shape[1:]) x = backbone_output_shape for layer in model.layers[2:]: x = layer(x) head = keras.Model(inputs=backbone_output_shape, outputs=x) # Apply the backbone model with the data loader to retrieve the feature vectors - feature_vectors = backbone_model.predict( - data_loader, verbose=self.keras_verbose - ) + try: + feature_vectors = backbone_model.predict( + data_loader, verbose=self.keras_verbose + ) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Apply the head model with the feature vectors to retrieve the prediction predict_data = Table( { @@ -522,7 +788,16 @@ def _predict_with_model(self, model_path): ) else: # Predict the data using the loaded model - predict_data = model.predict(data_loader, verbose=self.keras_verbose) + try: + predict_data = model.predict(data_loader, verbose=self.keras_verbose) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Create a astropy table with the prediction results # The classification task has a softmax layer as the last layer # which returns the probabilities for each class in an array, while @@ -546,7 +821,7 @@ def _predict_with_model(self, model_path): predict_data = vstack([predict_data, predict_data_last_batch]) return predict_data, feature_vectors - def _predict_classification(self, example_identifiers): + def _predict_particletype(self, example_identifiers): """ Predict the classification of the primary particle type. @@ -556,7 +831,7 @@ def _predict_classification(self, example_identifiers): Parameters: ----------- - classification_table : astropy.table.Table + particletype_table : astropy.table.Table Table containing the example identifiers with an additional column for the predicted classification score ('gammaness'). feature_vectors : np.ndarray @@ -570,11 +845,11 @@ def _predict_classification(self, example_identifiers): self.load_type_model_from ) # Create prediction table and add the predicted classification score ('gammaness') - classification_table = example_identifiers.copy() - classification_table.add_column( - predict_data["type"].T[1], name=f"{self.prefix}_tel_prediction" + particletype_table = example_identifiers.copy() + particletype_table.add_column( + predict_data["type"].T[1], name=f"{self.prefixes['type']}_tel_prediction" ) - return classification_table, feature_vectors + return particletype_table, feature_vectors def _predict_energy(self, example_identifiers): """ @@ -604,7 +879,9 @@ def _predict_energy(self, example_identifiers): ) # Create prediction table and add the reconstructed energy in TeV energy_table = example_identifiers.copy() - energy_table.add_column(reco_energy, name=f"{self.prefix}_tel_energy") + energy_table.add_column( + reco_energy, name=f"{self.prefixes['energy']}_tel_energy" + ) return energy_table, feature_vectors def _predict_cameradirection(self, example_identifiers): @@ -629,7 +906,7 @@ def _predict_cameradirection(self, example_identifiers): Feature vectors extracted from the backbone model. """ self.log.info( - "Predicting for the regression of the primary particle arrival direction based on camera coordinate offsets..." + "Predicting for the reconstruction of the primary particle arrival direction based on camera coordinate offsets..." ) # Predict the data using the loaded direction_model predict_data, feature_vectors = self._predict_with_model( @@ -667,7 +944,7 @@ def _predict_skydirection(self, example_identifiers): Feature vectors extracted from the backbone model. """ self.log.info( - "Predicting for the regression of the primary particle arrival direction based on spherical coordinate offsets..." + "Predicting for the reconstruction of the primary particle arrival direction based on spherical coordinate offsets..." ) # Predict the data using the loaded direction_model predict_data, feature_vectors = self._predict_with_model( @@ -677,7 +954,7 @@ def _predict_skydirection(self, example_identifiers): # from the telescope pointing. fov_lon = u.Quantity(predict_data["skydirection"].T[0], unit=u.deg) fov_lat = u.Quantity(predict_data["skydirection"].T[1], unit=u.deg) - # Create prediction table and add the reconstructed energy in TeV + # Create prediction table and add the reconstructed fov_lon and fov_lat skydirection_table = example_identifiers.copy() skydirection_table.add_column(fov_lon, name="fov_lon") skydirection_table.add_column(fov_lat, name="fov_lat") @@ -741,8 +1018,14 @@ def _transform_cam_coord_offsets_to_sky(self, table) -> Table: # Transform the true Alt/Az coordinates to camera coordinates reco_direction = cam_coord_offset.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table - table.add_column(reco_direction.az.to(u.deg), name=f"{self.prefix}_tel_az") - table.add_column(reco_direction.alt.to(u.deg), name=f"{self.prefix}_tel_alt") + table.add_column( + reco_direction.az.to(u.deg), + name=f"{self.prefixes['cameradirection']}_tel_az", + ) + table.add_column( + reco_direction.alt.to(u.deg), + name=f"{self.prefixes['cameradirection']}_tel_alt", + ) # Remove unnecessary columns from the table that do not the ctapipe DL2 data format table.remove_columns( [ @@ -805,8 +1088,12 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: # Transform the reco direction from nominal frame to the AltAz frame sky_coord = reco_direction.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table - table.add_column(sky_coord.az.to(u.deg), name=f"{self.prefix}_az") - table.add_column(sky_coord.alt.to(u.deg), name=f"{self.prefix}_alt") + table.add_column( + sky_coord.az.to(u.deg), name=f"{self.prefixes['skydirection']}_az" + ) + table.add_column( + sky_coord.alt.to(u.deg), name=f"{self.prefixes['skydirection']}_alt" + ) # Remove unnecessary columns from the table that do not the ctapipe DL2 data format table.remove_columns( [ @@ -819,64 +1106,6 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: ) return table - def _create_nan_table(self, nonexample_identifiers, columns, shapes): - """ - Create a table with NaNs for missing predictions. - - This method creates a table with NaNs for missing predictions for the non-example identifiers. - In stereo mode, the table also a column for the valid telescopes is added with all False values. - - Parameters: - ----------- - nonexample_identifiers : astropy.table.Table - Table containing the non-example identifiers. - columns : list of str - List of column names to create in the table. - shapes : list of shapes - List of shapes for the columns to create in the table. - - Returns: - -------- - nan_table : astropy.table.Table - Table containing NaNs for missing predictions. - """ - # Create a table with NaNs for missing predictions - nan_table = nonexample_identifiers.copy() - for column_name, shape in zip(columns, shapes): - nan_table.add_column(np.full(shape, np.nan), name=column_name) - # Add that no telescope is valid for the non-example identifiers in stereo mode - if self.dl1dh_reader.mode == "stereo": - nan_table.add_column( - np.zeros( - (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), - dtype=bool, - ), - name=f"{self.prefix}_telescopes", - ) - return nan_table - - def deduplicate_first_valid( - self, - table: Table, - keys=('obs_id', 'event_id'), - valid_col='CTLearn_is_valid', - ): - """ - Return a deduplicated Astropy Table. - - For each group defined by `keys`, keep the first row where - `valid_col` is True. If none are valid, keep the first row. - """ - - t = table.copy() - - t.sort( - list(keys) + [valid_col], - reverse=[False] * len(keys) + [True] - ) - - return unique(t, keys=list(keys), keep='first') - def _store_pointing(self, all_identifiers): """ Store the telescope pointing table from to the output file. @@ -918,13 +1147,12 @@ def _store_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info = vstack(pointing_info) if self.dl1dh_reader.mode == "stereo": @@ -953,13 +1181,12 @@ def _store_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info @@ -967,7 +1194,7 @@ def _create_feature_vectors_table( self, example_identifiers, nonexample_identifiers=None, - classification_feature_vectors=None, + particletype_feature_vectors=None, energy_feature_vectors=None, direction_feature_vectors=None, ): @@ -984,8 +1211,8 @@ def _create_feature_vectors_table( Table containing the example identifiers. nonexample_identifiers : astropy.table.Table or None Table containing the non-example identifiers to fill the NaNs. - classification_feature_vectors : np.ndarray or None - Array containing the classification feature vectors. + particletype_feature_vectors : np.ndarray or None + Array containing the particletype feature vectors. energy_feature_vectors : np.ndarray or None Array containing the energy feature vectors. direction_feature_vectors : np.ndarray or None @@ -999,29 +1226,34 @@ def _create_feature_vectors_table( # Create the feature vector table feature_vector_table = example_identifiers.copy() columns_list, shapes_list = [], [] - if classification_feature_vectors is not None: + if particletype_feature_vectors is not None: is_valid_col = ~np.isnan( - np.min(classification_feature_vectors, axis=1), dtype=bool + np.min(particletype_feature_vectors, axis=1), dtype=bool ) feature_vector_table.add_column( - classification_feature_vectors, - name=f"{self.prefix}_tel_classification_feature_vectors", + particletype_feature_vectors, + name=f"{self.prefixes['all']}_tel_particletype_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_classification_feature_vectors") + columns_list.append( + f"{self.prefixes['all']}_tel_particletype_feature_vectors" + ) shapes_list.append( ( len(nonexample_identifiers), - classification_feature_vectors.shape[1], + particletype_feature_vectors.shape[1], ) ) if energy_feature_vectors is not None: is_valid_col = ~np.isnan(np.min(energy_feature_vectors, axis=1), dtype=bool) feature_vector_table.add_column( - energy_feature_vectors, name=f"{self.prefix}_tel_energy_feature_vectors" + energy_feature_vectors, + name=f"{self.prefixes['all']}_tel_energy_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_energy_feature_vectors") + columns_list.append( + f"{self.prefixes['all']}_tel_energy_feature_vectors" + ) shapes_list.append( ( len(nonexample_identifiers), @@ -1037,10 +1269,12 @@ def _create_feature_vectors_table( ) feature_vector_table.add_column( direction_feature_vectors, - name=f"{self.prefix}_tel_geometry_feature_vectors", + name=f"{self.prefixes['all']}_tel_geometry_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_geometry_feature_vectors") + columns_list.append( + f"{self.prefixes['all']}_tel_geometry_feature_vectors" + ) shapes_list.append( ( len(nonexample_identifiers), @@ -1054,6 +1288,7 @@ def _create_feature_vectors_table( nonexample_identifiers, columns=columns_list, shapes=shapes_list, + reco_task="all", ) feature_vector_table = vstack([feature_vector_table, nan_table]) is_valid_col = np.concatenate( @@ -1062,7 +1297,7 @@ def _create_feature_vectors_table( # Add is_valid column to the feature vector table feature_vector_table.add_column( is_valid_col, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['all']}_tel_is_valid", ) return feature_vector_table @@ -1108,11 +1343,9 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/mono/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono/cameradirection/ctlearn_model.cpk" \\ --dl1-features \\ - --use-HDF5Merger \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ To predict from pixel-wise waveform data in mono mode using trained CTLearn models: > ctlearn-predict-mono-model \\ @@ -1123,13 +1356,11 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --type_model="/path/to/your/mono_waveform/type/ctlearn_model.cpk" \\ --energy_model="/path/to/your/mono_waveform/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono_waveform/cameradirection/ctlearn_model.cpk" \\ - --use-HDF5Merger \\ --no-r0-waveforms \\ --no-r1-waveforms \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ stereo_combiner_cls = ComponentName( @@ -1142,11 +1373,14 @@ def start(self): self.log.info("Processing the telescope pointings...") # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.example_identifiers.copy() - example_identifiers.keep_columns(TELESCOPE_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.tel_trigger_table.copy() - all_identifiers.keep_columns(TELESCOPE_EVENT_KEYS + ["time"]) + example_identifiers.keep_columns(TEL_EVENT_KEYS) + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + all_identifiers.keep_columns(TEL_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( - all_identifiers, example_identifiers, keys=TELESCOPE_EVENT_KEYS + all_identifiers, example_identifiers, keys=TEL_EVENT_KEYS ) nonexample_identifiers.remove_column("time") # Pointing table for the mono mode for MC simulation @@ -1158,111 +1392,116 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: - self.type_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.PARTICLE_TYPE, - parent=self, + # Predict the type of the primary particle + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) - # Predict the energy of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefixes['type']}_tel_prediction"], + shapes=[(len(nonexample_identifiers),)], + reco_task="type", + ) + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particle type table + particletype_table.add_column( + ~np.isnan( + particletype_table[f"{self.prefixes['type']}_tel_prediction"].data, + dtype=bool, + ), + name=f"{self.prefixes['type']}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + particletype_table, + ParticleClassificationContainer, + prefix=self.prefixes["type"], + add_tel_prefix=True, ) if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], - shapes=[(len(nonexample_identifiers),)], - ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the energy table - classification_table.add_column( - ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - classification_table, - ParticleClassificationContainer, - prefix=self.prefix, - add_tel_prefix=True, - ) for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope - telescope_mask = classification_table["tel_id"] == tel_id - classification_tel_table = classification_table[telescope_mask] - classification_tel_table.sort(TELESCOPE_EVENT_KEYS) + telescope_mask = particletype_table["tel_id"] == tel_id + particletype_tel_table = particletype_table[telescope_mask] + particletype_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file for the selected telescope write_table( - classification_tel_table, + particletype_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{tel_id:03d}", ) + if self.dl2_subarray: self.log.info("Processing and storing the subarray type prediction...") - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: - classification_subarray_table = classification_table.copy() + particletype_subarray_table = particletype_table.copy() telescope_mask = ( - classification_subarray_table["tel_id"] + particletype_subarray_table["tel_id"] == self.dl1dh_reader.tel_ids[0] ) - classification_subarray_table = classification_subarray_table[ + particletype_subarray_table = particletype_subarray_table[ telescope_mask ] - classification_subarray_table.remove_column("tel_id") - for colname in classification_subarray_table.colnames: + particletype_subarray_table.remove_column("tel_id") + for colname in particletype_subarray_table.colnames: if "_tel_" in colname: - classification_subarray_table.rename_column( + particletype_subarray_table.rename_column( colname, colname.replace("_tel", "") ) - classification_subarray_table.add_column( + particletype_subarray_table.add_column( [ [val] - for val in classification_subarray_table[ - f"{self.prefix}_is_valid" + for val in particletype_subarray_table[ + f"{self.prefixes['type']}_is_valid" ] ], - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes['type']}_telescopes", ) else: + self.type_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefixes["type"], + property=ReconstructionProperty.PARTICLE_TYPE, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner - classification_subarray_table = ( - self.type_stereo_combiner.predict_table(classification_table) + particletype_subarray_table = ( + self.type_stereo_combiner.predict_table(particletype_table) ) # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - classification_subarray_table[f"{self.prefix}_telescopes"].dtype + particletype_subarray_table[ + f"{self.prefixes['type']}_telescopes" + ].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination reco_telescopes = np.zeros( ( - len(classification_subarray_table), + len(particletype_subarray_table), len(self.dl1dh_reader.tel_ids), ), dtype=bool, ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - classification_subarray_table[f"{self.prefix}_telescopes"] + particletype_subarray_table[ + f"{self.prefixes['type']}_telescopes" + ] ): if not tel_id_mask: continue @@ -1273,88 +1512,94 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - classification_subarray_table[f"{self.prefix}_telescopes"] = ( - reco_telescopes - ) - # Deduplicate the subarray classification table to have only one entry per event - classification_subarray_table = super().deduplicate_first_valid( - table=classification_subarray_table, + particletype_subarray_table[ + f"{self.prefixes['type']}_telescopes" + ] = reco_telescopes + # Deduplicate the subarray particletype table to have only one entry per event + particletype_subarray_table = super().deduplicate_first_valid( + table=particletype_subarray_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", - ) - # Sort the subarray classification table - classification_subarray_table.sort(SUBARRAY_EVENT_KEYS) + valid_col=f"{self.prefixes['type']}_is_valid", + ) + # Sort the subarray particletype table + particletype_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( - classification_subarray_table, + particletype_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + ) energy_feature_vectors = None if self.load_energy_model_from is not None: - self.energy_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.ENERGY, - parent=self, - ) # Predict the energy of the primary particle energy_table, energy_feature_vectors = super()._predict_energy( example_identifiers ) - if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], - shapes=[(len(nonexample_identifiers),)], - ) - energy_table = vstack([energy_table, nan_table]) - # Add is_valid column to the energy table - energy_table.add_column( - ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - energy_table, - ReconstructedEnergyContainer, - prefix=self.prefix, - add_tel_prefix=True, + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefixes['energy']}_tel_energy"], + shapes=[(len(nonexample_identifiers),)], + reco_task="energy", ) + energy_table = vstack([energy_table, nan_table]) + # Add is_valid column to the energy table + energy_table.add_column( + ~np.isnan( + energy_table[f"{self.prefixes['energy']}_tel_energy"].data, + dtype=bool, + ), + name=f"{self.prefixes['energy']}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + energy_table, + ReconstructedEnergyContainer, + prefix=self.prefixes["energy"], + add_tel_prefix=True, + ) + if self.dl2_telescope: for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = energy_table["tel_id"] == tel_id energy_tel_table = energy_table[telescope_mask] - energy_tel_table.sort(TELESCOPE_EVENT_KEYS) + energy_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( energy_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( "Processing and storing the subarray energy prediction..." ) - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: energy_subarray_table = energy_table.copy() @@ -1371,11 +1616,19 @@ def start(self): energy_subarray_table.add_column( [ [val] - for val in energy_subarray_table[f"{self.prefix}_is_valid"] + for val in energy_subarray_table[ + f"{self.prefixes['energy']}_is_valid" + ] ], - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes['energy']}_telescopes", ) else: + self.energy_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefixes["energy"], + property=ReconstructionProperty.ENERGY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner energy_subarray_table = self.energy_stereo_combiner.predict_table( energy_table @@ -1383,7 +1636,9 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - energy_subarray_table[f"{self.prefix}_telescopes"].dtype + energy_subarray_table[ + f"{self.prefixes['energy']}_telescopes" + ].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1396,7 +1651,9 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - energy_subarray_table[f"{self.prefix}_telescopes"] + energy_subarray_table[ + f"{self.prefixes['energy']}_telescopes" + ] ): if not tel_id_mask: continue @@ -1407,101 +1664,109 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - energy_subarray_table[f"{self.prefix}_telescopes"] = ( - reco_telescopes - ) - # Deduplicate the subarray classification table to have only one entry per event + energy_subarray_table[ + f"{self.prefixes['energy']}_telescopes" + ] = reco_telescopes + # Deduplicate the subarray energy table to have only one entry per event energy_subarray_table = super().deduplicate_first_valid( table=energy_subarray_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", - ) + valid_col=f"{self.prefixes['energy']}_is_valid", + ) # Sort the subarray energy table energy_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + ) direction_feature_vectors = None if self.load_cameradirection_model_from is not None: - self.geometry_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.GEOMETRY, - parent=self, - ) # Join the prediction table with the telescope pointing table example_identifiers = join( left=example_identifiers, right=pointing_info, - keys=TELESCOPE_EVENT_KEYS, + keys=TEL_EVENT_KEYS, ) # Predict the arrival direction of the primary particle direction_table, direction_feature_vectors = ( super()._predict_cameradirection(example_identifiers) ) direction_tel_tables = [] - if self.dl2_telescope: - for tel_id in self.dl1dh_reader.selected_telescopes[ - self.dl1dh_reader.tel_type - ]: - # Retrieve the example identifiers for the selected telescope - telescope_mask = direction_table["tel_id"] == tel_id - direction_tel_table = direction_table[telescope_mask] - direction_tel_table = super()._transform_cam_coord_offsets_to_sky( - direction_tel_table - ) - # Produce output table with NaNs for missing predictions - nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id - nonexample_identifiers_tel = nonexample_identifiers[ - nan_telescope_mask - ] - if len(nonexample_identifiers_tel) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers_tel, - columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], - shapes=[ - (len(nonexample_identifiers_tel),), - (len(nonexample_identifiers_tel),), - ], - ) - direction_tel_table = vstack([direction_tel_table, nan_table]) - direction_tel_table.sort(TELESCOPE_EVENT_KEYS) - # Add is_valid column to the direction table - direction_tel_table.add_column( - ~np.isnan( - direction_tel_table[f"{self.prefix}_tel_alt"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - direction_tel_table, - ReconstructedGeometryContainer, - prefix=self.prefix, - add_tel_prefix=True, + for tel_id in self.dl1dh_reader.selected_telescopes[ + self.dl1dh_reader.tel_type + ]: + # Retrieve the example identifiers for the selected telescope + telescope_mask = direction_table["tel_id"] == tel_id + direction_tel_table = direction_table[telescope_mask] + direction_tel_table = super()._transform_cam_coord_offsets_to_sky( + direction_tel_table + ) + # Produce output table with NaNs for missing predictions + nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id + nonexample_identifiers_tel = nonexample_identifiers[nan_telescope_mask] + if len(nonexample_identifiers_tel) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers_tel, + columns=[ + f"{self.prefixes['cameradirection']}_tel_alt", + f"{self.prefixes['cameradirection']}_tel_az", + ], + shapes=[ + (len(nonexample_identifiers_tel),), + (len(nonexample_identifiers_tel),), + ], + reco_task="cameradirection", ) - direction_tel_tables.append(direction_tel_table) + direction_tel_table = vstack([direction_tel_table, nan_table]) + direction_tel_table.sort(TEL_EVENT_KEYS) + # Add is_valid column to the direction table + direction_tel_table.add_column( + ~np.isnan( + direction_tel_table[ + f"{self.prefixes['cameradirection']}_tel_alt" + ].data, + dtype=bool, + ), + name=f"{self.prefixes['cameradirection']}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + direction_tel_table, + ReconstructedGeometryContainer, + prefix=self.prefixes["cameradirection"], + add_tel_prefix=True, + ) + direction_tel_tables.append(direction_tel_table) + if self.dl2_telescope: # Save the prediction to the output file write_table( direction_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( @@ -1510,7 +1775,7 @@ def start(self): # Stack the telescope tables to the subarray table direction_tel_tables = vstack(direction_tel_tables) # Sort the table by the telescope event keys - direction_tel_tables.sort(TELESCOPE_EVENT_KEYS) + direction_tel_tables.sort(TEL_EVENT_KEYS) # If only one telescope is used, copy the classification table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: @@ -1530,12 +1795,18 @@ def start(self): [ [val] for val in direction_subarray_table[ - f"{self.prefix}_is_valid" + f"{self.prefixes['cameradirection']}_is_valid" ] ], - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes['cameradirection']}_telescopes", ) else: + self.geometry_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefixes["cameradirection"], + property=ReconstructionProperty.GEOMETRY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner direction_subarray_table = ( self.geometry_stereo_combiner.predict_table( @@ -1545,7 +1816,9 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - direction_subarray_table[f"{self.prefix}_telescopes"].dtype + direction_subarray_table[ + f"{self.prefixes['cameradirection']}_telescopes" + ].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1558,7 +1831,9 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - direction_subarray_table[f"{self.prefix}_telescopes"] + direction_subarray_table[ + f"{self.prefixes['cameradirection']}_telescopes" + ] ): if not tel_id_mask: continue @@ -1569,60 +1844,70 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - direction_subarray_table[f"{self.prefix}_telescopes"] = ( - reco_telescopes - ) - # Deduplicate the subarray classification table to have only one entry per event + direction_subarray_table[ + f"{self.prefixes['cameradirection']}_telescopes" + ] = reco_telescopes + # Deduplicate the subarray direction table to have only one entry per event direction_subarray_table = super().deduplicate_first_valid( table=direction_subarray_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", - ) + valid_col=f"{self.prefixes['cameradirection']}_is_valid", + ) # Sort the subarray geometry table direction_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['cameradirection']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['cameradirection']}", + ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: self.log.info("Processing and storing dl1 feature vectors...") feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = feature_vector_table["tel_id"] == tel_id feature_vectors_tel_table = feature_vector_table[telescope_mask] - feature_vectors_tel_table.sort(TELESCOPE_EVENT_KEYS) + feature_vectors_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( feature_vectors_tel_table, self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{tel_id:03d}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{tel_id:03d}", ) def _store_mc_telescope_pointing(self, all_identifiers): @@ -1650,7 +1935,7 @@ def _store_mc_telescope_pointing(self, all_identifiers): keys=["obs_id", "tel_id"], ) # TODO: use keep_order for astropy v7.0.0 - tel_pointing.sort(TELESCOPE_EVENT_KEYS) + tel_pointing.sort(TEL_EVENT_KEYS) # Retrieve the example identifiers for the selected telescope tel_pointing_table = Table( { @@ -1662,13 +1947,12 @@ def _store_mc_telescope_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info.append(tel_pointing) pointing_info = vstack(pointing_info) @@ -1719,7 +2003,6 @@ class StereoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/stereo/energy/ctlearn_model.cpk" \\ --skydirection_model="/path/to/your/stereo/skydirection/ctlearn_model.cpk" \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ def start(self): @@ -1727,8 +2010,13 @@ def start(self): # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.unique_example_identifiers.copy() example_identifiers.keep_columns(SUBARRAY_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.subarray_trigger_table.copy() + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) all_identifiers.keep_columns(SUBARRAY_EVENT_KEYS + ["time"]) + # Unique example identifiers by events + all_identifiers = unique(all_identifiers, keys=SUBARRAY_EVENT_KEYS) nonexample_identifiers = setdiff( all_identifiers, example_identifiers, keys=SUBARRAY_EVENT_KEYS ) @@ -1745,7 +2033,7 @@ def start(self): survival_telescopes.append(survival_mask) # Add the survival telescopes to the example_identifiers example_identifiers.add_column( - survival_telescopes, name=f"{self.prefix}_telescopes" + survival_telescopes, name=f"{self.prefixes['all']}_telescopes" ) # Pointing table for the stereo mode for MC simulation if self.dl1dh_reader.process_type == ProcessType.Simulation: @@ -1756,61 +2044,77 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: # Predict the classification of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) if self.dl2_subarray: + particletype_table.rename_column( + f"{self.prefixes['all']}_telescopes", + f"{self.prefixes['type']}_telescopes", + ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], + columns=[f"{self.prefixes['type']}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], + reco_task="type", ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the classification table - classification_table.add_column( + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particletype table + particletype_table.add_column( ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, + particletype_table[ + f"{self.prefixes['type']}_tel_prediction" + ].data, dtype=bool, ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['type']}_is_valid", ) # Rename the columns for the stereo mode - classification_table.rename_column( - f"{self.prefix}_tel_prediction", f"{self.prefix}_prediction" + particletype_table.rename_column( + f"{self.prefixes['type']}_tel_prediction", + f"{self.prefixes['type']}_prediction", ) - classification_table.rename_column( - f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" - ) - # Deduplicate the subarray classification table to have only one entry per event - classification_table = super().deduplicate_first_valid( - table=classification_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_table = super().deduplicate_first_valid( + table=particletype_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['type']}_is_valid", ) - classification_table.sort(SUBARRAY_EVENT_KEYS) + particletype_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( - classification_table, + particletype_table, ParticleClassificationContainer, - prefix=self.prefix, + prefix=self.prefixes["type"], ) # Save the prediction to the output file write_table( - classification_table, + particletype_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + ) energy_feature_vectors = None if self.load_energy_model_from is not None: # Predict the energy of the primary particle @@ -1818,53 +2122,68 @@ def start(self): example_identifiers ) if self.dl2_subarray: + energy_table.rename_column( + f"{self.prefixes['all']}_telescopes", + f"{self.prefixes['energy']}_telescopes", + ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], + columns=[f"{self.prefixes['energy']}_tel_energy"], shapes=[(len(nonexample_identifiers),)], + reco_task="energy", ) energy_table = vstack([energy_table, nan_table]) # Add is_valid column to the energy table energy_table.add_column( ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool + energy_table[f"{self.prefixes['energy']}_tel_energy"].data, + dtype=bool, ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['energy']}_is_valid", ) # Rename the columns for the stereo mode energy_table.rename_column( - f"{self.prefix}_tel_energy", f"{self.prefix}_energy" - ) - energy_table.rename_column( - f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" + f"{self.prefixes['energy']}_tel_energy", + f"{self.prefixes['energy']}_energy", ) # Deduplicate the subarray energy table to have only one entry per event energy_table = super().deduplicate_first_valid( table=energy_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['energy']}_is_valid", ) energy_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( energy_table, ReconstructedEnergyContainer, - prefix=self.prefix, + prefix=self.prefixes["energy"], ) # Save the prediction to the output file write_table( energy_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + ) direction_feature_vectors = None if self.load_skydirection_model_from is not None: # Join the prediction table with the telescope pointing table @@ -1878,6 +2197,10 @@ def start(self): example_identifiers ) if self.dl2_subarray: + direction_table.rename_column( + f"{self.prefixes['all']}_telescopes", + f"{self.prefixes['skydirection']}_telescopes", + ) # Transform the spherical coordinate offsets to sky coordinates direction_table = super()._transform_spher_coord_offsets_to_sky( direction_table @@ -1886,85 +2209,102 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_alt", f"{self.prefix}_az"], + columns=[ + f"{self.prefixes['skydirection']}_alt", + f"{self.prefixes['skydirection']}_az", + ], shapes=[ (len(nonexample_identifiers),), (len(nonexample_identifiers),), ], + reco_task="skydirection", ) direction_table = vstack([direction_table, nan_table]) # Add is_valid column to the direction table direction_table.add_column( - ~np.isnan(direction_table[f"{self.prefix}_alt"].data, dtype=bool), - name=f"{self.prefix}_is_valid", + ~np.isnan( + direction_table[f"{self.prefixes['skydirection']}_alt"].data, + dtype=bool, + ), + name=f"{self.prefixes['skydirection']}_is_valid", ) # Deduplicate the subarray direction table to have only one entry per event direction_table = super().deduplicate_first_valid( table=direction_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['skydirection']}_is_valid", ) direction_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( direction_table, ReconstructedGeometryContainer, - prefix=self.prefix, + prefix=self.prefixes["skydirection"], ) # Save the prediction to the output file write_table( direction_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['skydirection']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['skydirection']}", ) - + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['skydirection']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['skydirection']}", + ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: self.log.info("Processing and storing dl1 feature vectors...") feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{self.prefixes['all']}/tel_{tel_id:03d} table. # Rename the columns for the stereo mode feature_vector_table.rename_column( - f"{self.prefix}_tel_classification_feature_vectors", - f"{self.prefix}_classification_feature_vectors", + f"{self.prefixes['all']}_tel_particletype_feature_vectors", + f"{self.prefixes['all']}_particletype_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefix}_tel_energy_feature_vectors", - f"{self.prefix}_energy_feature_vectors", + f"{self.prefixes['all']}_tel_energy_feature_vectors", + f"{self.prefixes['all']}_energy_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefix}_tel_geometry_feature_vectors", - f"{self.prefix}_geometry_feature_vectors", + f"{self.prefixes['all']}_tel_geometry_feature_vectors", + f"{self.prefixes['all']}_geometry_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" + f"{self.prefixes['all']}_tel_is_valid", + f"{self.prefixes['all']}_is_valid", ) feature_vector_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( feature_vector_table, self.output_path, - f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL1_SUBARRAY_GROUP}/features/{self.prefixes['all']}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", + f"{DL1_SUBARRAY_GROUP}/features/{self.prefixes['all']}", ) def _store_mc_subarray_pointing(self, all_identifiers): @@ -1979,7 +2319,7 @@ def _store_mc_subarray_pointing(self, all_identifiers): # Read the subarray pointing table pointing_info = read_table( self.input_url, - f"{SIMULATION_CONFIG_TABLE}", + SIMULATION_RUN_TABLE, ) # Assuming min_az = max_az and min_alt = max_alt pointing_info.keep_columns(["obs_id", "min_az", "min_alt"]) @@ -2007,13 +2347,12 @@ def _store_mc_subarray_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info diff --git a/ctlearn/tools/tests/test_predict_LST1.py b/ctlearn/tools/tests/test_predict_LST1.py new file mode 100644 index 00000000..62969979 --- /dev/null +++ b/ctlearn/tools/tests/test_predict_LST1.py @@ -0,0 +1,122 @@ +import shutil +import numpy as np +import pytest + +from ctapipe.core import run_tool +from ctapipe.io import TableLoader +from ctlearn.tools import LST1PredictionTool + +# Columns that should be present in the output DL2 file +REQUIRED_COLUMNS = [ + "event_id", + "obs_id", + "CTLearnCameraReconstructor_tel_alt", + "CTLearnCameraReconstructor_alt", + "CTLearnCameraReconstructor_tel_az", + "CTLearnCameraReconstructor_az", + "CTLearnClassifier_tel_prediction", + "CTLearnClassifier_prediction", + "CTLearnRegressor_tel_energy", + "CTLearnRegressor_energy", + "CTLearnCameraReconstructor_tel_is_valid", + "CTLearnCameraReconstructor_is_valid", + "CTLearnClassifier_tel_is_valid", + "CTLearnClassifier_is_valid", + "CTLearnRegressor_tel_is_valid", + "CTLearnRegressor_is_valid", + "CTLearnCameraReconstructor_telescopes", + "CTLearnClassifier_telescopes", + "CTLearnRegressor_telescopes", +] + + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") +def test_predict_mono_model_with_lst1_mock_data( + tmp_path, ctlearn_trained_dl1_mono_models, mock_lst1_dl1_file +): + """ + Test LST1PredictionTool using trained mono models and mock LST-1 DL1 files. + Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy the trained models to the model directory + telescope_type = "LST" + for reco_task in ["type", "energy", "cameradirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy( + ctlearn_trained_dl1_mono_models[key], + model_dir / f"ctlearn_mono_model_{key}.keras", + ) + model_file = model_dir / f"ctlearn_mono_model_{key}.keras" + assert model_file.exists(), f"Trained mono model file not found for {key}" + + # Check that the mock LST1 DL1 file was created + assert mock_lst1_dl1_file.exists(), "Mock LST1 DL1 file not found" + + output_file = dl2_dir / "mock_lst1_predictions.dl2.h5" + + # Build command-line arguments for LST1PredictionTool + argv = [ + f"--input_url={mock_lst1_dl1_file}", + f"--output={output_file}", + "--LST1PredictionTool.batch_size=2", + "--LST1PredictionTool.channels=cleaned_image", + "--LST1PredictionTool.channels=cleaned_relative_peak_time", + "--LST1PredictionTool.image_mapper_type=BilinearMapper", + f"--type_model={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--energy_model={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--cameradirection_model={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + "--dl2-telescope", + "--overwrite", + ] + + # Run LST1PredictionTool + assert run_tool(LST1PredictionTool(), argv=argv, cwd=tmp_path) == 0 + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + + # Check that the created DL2 file can be read with the TableLoader + allowed_tels = [1] + with TableLoader( + output_file, pointing=True, focal_length_choice="EFFECTIVE" + ) as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id( + telescopes=allowed_tels, dl1_parameters=True, dl2=True + ) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + if "_tel_" not in col: + continue + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py new file mode 100644 index 00000000..5f17fc34 --- /dev/null +++ b/ctlearn/tools/tests/test_predict_model.py @@ -0,0 +1,340 @@ +import shutil +import numpy as np +import pytest + +from ctapipe.core import run_tool +from ctapipe.io import TableLoader +from ctlearn.tools import MonoPredictCTLearnModel, StereoPredictCTLearnModel + +# Columns that should be present in the output DL2 file +REQUIRED_COLUMNS = [ + "event_id", + "obs_id", + "CTLearnCameraReconstructor_tel_alt", + "CTLearnCameraReconstructor_alt", + "true_alt", + "CTLearnCameraReconstructor_tel_az", + "CTLearnCameraReconstructor_az", + "true_az", + "CTLearnClassifier_tel_prediction", + "CTLearnClassifier_prediction", + "true_shower_primary_id", + "CTLearnRegressor_tel_energy", + "CTLearnRegressor_energy", + "true_energy", + "CTLearnCameraReconstructor_tel_is_valid", + "CTLearnCameraReconstructor_is_valid", + "CTLearnClassifier_tel_is_valid", + "CTLearnClassifier_is_valid", + "CTLearnRegressor_tel_is_valid", + "CTLearnRegressor_is_valid", + "CTLearnCameraReconstructor_telescopes", + "CTLearnClassifier_telescopes", + "CTLearnRegressor_telescopes", + "tels_with_trigger", +] + + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2") +def test_predict_mono_model_with_r1_waveforms( + tmp_path, ctlearn_trained_r1_mono_models, r1_gamma_file +): + """ + Test training CTLearn mono model using the R1 gamma and proton files for all reconstruction tasks + and predicting DL2 from R1 waveforms. Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + # Define telescope types and their available telescopes + telescope_type = "LST" + available_tels = [1, 2] + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "cameradirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy( + ctlearn_trained_r1_mono_models[key], + model_dir / f"ctlearn_mono_model_{key}.keras", + ) + model_file = model_dir / f"ctlearn_mono_model_{key}.keras" + assert model_file.exists(), f"Trained mono model file not found for {key}" + # Build command-line arguments + argv = [ + f"--input_url={r1_gamma_file}", + "--PredictCTLearnModel.batch_size=2", + "--PredictCTLearnModel.dl1dh_reader_type=DLWaveformReader", + "--DLWaveformReader.sequence_length=5", + "--DLWaveformReader.focal_length_choice=EQUIVALENT", + "--no-r1-waveforms", + "--dl2-telescope", + ] + output_file = dl2_dir / f"gamma_{telescope_type}_mono_from_waveforms.dl2.h5" + # Run Prediction tool + assert ( + run_tool( + MonoPredictCTLearnModel(), + argv=argv + + [ + f"--output={output_file}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + ], + cwd=tmp_path, + ) + == 0 + ) + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader( + output_file, pointing=True, focal_length_choice="EQUIVALENT" + ) as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id( + telescopes=available_tels, dl1_parameters=True, dl2=True + ) + for tel_id in available_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") +@pytest.mark.parametrize("dl2_tel_flag", ["dl2-telescope", "no-dl2-telescope"]) +def test_predict_mono_model_with_dl1_images( + tmp_path, ctlearn_trained_dl1_mono_models, dl1_gamma_file, dl2_tel_flag +): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks + and predicting DL2 from DL1 images. Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + # Define telescope types and their allowed telescopes + telescope_types = { + "LST": [2], + "MST": [7, 13, 15, 16, 17, 19], + # "SST": [30, 37, 43, 44, 53], + } + image_mapper_types = { + "LST": "BilinearMapper", + "MST": "OversamplingMapper", + # "SST": "SquareMapper", + } + # Hardcopy the trained models to the model directory + for telescope_type in telescope_types.keys(): + for reco_task in ["type", "energy", "cameradirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy( + ctlearn_trained_dl1_mono_models[key], + model_dir / f"ctlearn_mono_model_{key}.keras", + ) + model_file = model_dir / f"ctlearn_mono_model_{key}.keras" + assert model_file.exists(), f"Trained mono model file not found for {key}" + # Build command-line arguments + argv = [ + f"--input_url={dl1_gamma_file}", + "--PredictCTLearnModel.batch_size=2", + "--DLImageReader.focal_length_choice=EQUIVALENT", + "--no-dl1-images", + "--no-true-images", + f"--{dl2_tel_flag}", + ] + for telescope_type, allowed_tels in telescope_types.items(): + output_file = ( + dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}_mono_from_images.dl2.h5" + ) + # Run Prediction tool + assert ( + run_tool( + MonoPredictCTLearnModel(), + argv=argv + + [ + f"--output={output_file}", + f"--DLImageReader.allowed_tels={allowed_tels}", + f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + ], + cwd=tmp_path, + ) + == 0 + ) + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader( + output_file, pointing=True, focal_length_choice="EQUIVALENT" + ) as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id( + telescopes=allowed_tels, dl1_parameters=True, dl2=True + ) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + if dl2_tel_flag == "no-dl2-telescope" and "_tel_" in col: + continue + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") +def test_predict_stereo_model_with_dl1_images( + tmp_path, ctlearn_trained_dl1_stereo_models, dl1_gamma_file +): + """ + Test training CTLearn stereo model using the DL1 gamma and proton files for all reconstruction tasks + and predicting DL2 from DL1 images. Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + # Define telescope types and their available telescopes + telescope_type = "MST" + allowed_tels = [7, 13, 15] + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "skydirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy( + ctlearn_trained_dl1_stereo_models[key], + model_dir / f"ctlearn_stereo_model_{key}.keras", + ) + model_file = model_dir / f"ctlearn_stereo_model_{key}.keras" + assert model_file.exists(), f"Trained stereo model file not found for {key}" + # Build command-line arguments + argv = [ + f"--input_url={dl1_gamma_file}", + "--PredictCTLearnModel.batch_size=2", + "--PredictCTLearnModel.stack_telescope_images=True", + "--DLImageReader.mode=stereo", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + "--no-dl1-images", + "--no-true-images", + ] + output_file = dl2_dir / f"gamma_{telescope_type}_stereo_from_images.dl2.h5" + # Run Prediction tool + assert ( + run_tool( + StereoPredictCTLearnModel(), + argv=argv + + [ + f"--output={output_file}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_skydirection_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_skydirection.keras", + ], + cwd=tmp_path, + ) + == 0 + ) + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader( + output_file, pointing=True, focal_length_choice="EQUIVALENT" + ) as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id( + telescopes=allowed_tels, dl1_parameters=True, dl2=True + ) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + if "_tel_" in col: + continue + if "CTLearnCameraReconstructor" in col: + col = col.replace( + "CTLearnCameraReconstructor", "CTLearnSkyReconstructor" + ) + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + if "CTLearnCameraReconstructor" in col: + col = col.replace( + "CTLearnCameraReconstructor", "CTLearnSkyReconstructor" + ) + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py new file mode 100644 index 00000000..d2291652 --- /dev/null +++ b/ctlearn/tools/tests/test_train_model.py @@ -0,0 +1,78 @@ +import pandas as pd +import pytest +import shutil + +from ctapipe.core import run_tool +from ctlearn.tools import TrainCTLearnModel + + +@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection"]) +def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + allowed_tels = [7, 13, 15, 16, 17, 19] + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=2", + "--TrainCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ] + ) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + # --- Additional checks --- + # Check that the trained model exists + model_file = output_dir / "ctlearn_model.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + # Check training_log.csv exists + log_file = output_dir / "training_log.csv" + assert log_file.exists(), f"Training log file not found for {reco_task}" + # Read CSV and verify number of epochs + log_df = pd.read_csv(log_file) + num_epochs_logged = log_df.shape[0] + assert ( + num_epochs_logged == 2 + ), f"Expected two epochs, found {num_epochs_logged} for {reco_task}" + # Check that val_loss column exists + assert ( + "val_loss" in log_df.columns + ), f"'val_loss' column missing in training_log.csv for {reco_task}" + val_loss = log_df["val_loss"].dropna() + assert not val_loss.empty, f"'val_loss' column is empty for {reco_task}" + assert ((val_loss >= 0.0) & (val_loss <= 1.0)).all(), ( + f"'val_loss' values out of range [0.0, 1.0] for {reco_task}: " + f"{val_loss.tolist()}" + ) diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index 2ce4ced5..e4f729d2 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -6,7 +6,6 @@ import keras import pandas as pd import numpy as np -import shutil import tensorflow as tf from ctapipe.core import Tool @@ -24,6 +23,7 @@ Unicode, ) from dl1_data_handler.reader import DLDataReader +from ctlearn import __version__ as ctlearn_version from ctlearn.core.loader import DLDataLoader from ctlearn.core.model import CTLearnModel from ctlearn.utils import validate_trait_dict @@ -114,9 +114,9 @@ class TrainCTLearnModel(Tool): help="List of specific file pattern for matching files in ``input_dir_background``", ).tag(config=True) - dl1dh_reader_type = ComponentName( - DLDataReader, default_value="DLImageReader" - ).tag(config=True) + dl1dh_reader_type = ComponentName(DLDataReader, default_value="DLImageReader").tag( + config=True + ) stack_telescope_images = Bool( default_value=False, @@ -136,9 +136,7 @@ class TrainCTLearnModel(Tool): ), ).tag(config=True) - model_type = ComponentName( - CTLearnModel, default_value="ResNet" - ).tag(config=True) + model_type = ComponentName(CTLearnModel, default_value="ResNet").tag(config=True) output_dir = Path( exits=False, @@ -151,14 +149,14 @@ class TrainCTLearnModel(Tool): reco_tasks = List( trait=CaselessStrEnum(["type", "energy", "cameradirection", "skydirection"]), - allow_none=False, + allow_none=False, help=( "List of reconstruction tasks to perform. " - "'type': classification of the primary particle type " - "'energy': regression of the primary particle energy " - "'cameradirection': regression of the primary particle arrival direction in camera coordinates " - "'skydirection': regression of the primary particle arrival direction in sky coordinates" - ) + "'type': classification of the primary particle type; " + "'energy': regression of the primary particle energy; " + "'cameradirection': reconstruction of the primary particle arrival direction in camera coordinates; " + "'skydirection': reconstruction of the primary particle arrival direction in sky coordinates." + ), ).tag(config=True) n_epochs = Int( @@ -187,20 +185,29 @@ class TrainCTLearnModel(Tool): ).tag(config=True) optimizer = Dict( - default_value={"name": "Adam", "base_learning_rate": 0.0001, "adam_epsilon": 1.0e-8}, - help=( - "Optimizer to use for training. " - "E.g. {'name': 'Adam', 'base_learning_rate': 0.0001, 'adam_epsilon': 1.0e-8}. " - ) + default_value={ + "name": "Adam", + "base_learning_rate": 0.0001, + "adam_epsilon": 1.0e-8, + }, + help=( + "Optimizer to use for training. " + "E.g. {'name': 'Adam', 'base_learning_rate': 0.0001, 'adam_epsilon': 1.0e-8}. " + ), ).tag(config=True) lr_reducing = Dict( - default_value={"factor": 0.5, "patience": 5, "min_delta": 0.01, "min_lr": 0.000001}, + default_value={ + "factor": 0.5, + "patience": 5, + "min_delta": 0.01, + "min_lr": 0.000001, + }, allow_none=True, - help=( - "Learning rate reducing parameters for the Keras callback. " - "E.g. {'factor': 0.5, 'patience': 5, 'min_delta': 0.01, 'min_lr': 0.000001}. " - ) + help=( + "Learning rate reducing parameters for the Keras callback. " + "E.g. {'factor': 0.5, 'patience': 5, 'min_delta': 0.01, 'min_lr': 0.000001}. " + ), ).tag(config=True) random_seed = Int( @@ -209,7 +216,7 @@ class TrainCTLearnModel(Tool): "Random seed for shuffling the data " "before the training/validation split " "and after the end of an epoch." - ) + ), ).tag(config=True) save_onnx = Bool( @@ -217,17 +224,16 @@ class TrainCTLearnModel(Tool): allow_none=False, help="Set whether to save model in an ONNX file.", ).tag(config=True) - + early_stopping = Dict( default_value=None, allow_none=True, - help=( - "Early stopping parameters for the Keras callback. " - "E.g. {'monitor': 'val_loss', 'patience': 4, 'verbose': 1, 'restore_best_weights': True}. " - ) + help=( + "Early stopping parameters for the Keras callback. " + "E.g. {'monitor': 'val_loss', 'patience': 4, 'verbose': 1, 'restore_best_weights': True}. " + ), ).tag(config=True) - aliases = { "signal": "TrainCTLearnModel.input_dir_signal", "background": "TrainCTLearnModel.input_dir_background", @@ -240,6 +246,7 @@ class TrainCTLearnModel(Tool): classes = classes_with_traits(CTLearnModel) + classes_with_traits(DLDataReader) def setup(self): + self.log.info("ctlearn version %s", ctlearn_version) # Check if the output directory exists if self.output_dir.exists(): raise ToolConfigurationError( @@ -257,7 +264,9 @@ def setup(self): self.input_url_background = [] if self.input_dir_background is not None: for background_pattern in self.file_pattern_background: - self.input_url_background.extend(self.input_dir_background.glob(background_pattern)) + self.input_url_background.extend( + self.input_dir_background.glob(background_pattern) + ) # Set up the data reader self.log.info("Loading data:") @@ -275,8 +284,12 @@ def setup(self): ) self.log.info("Number of events loaded: %s", self.dl1dh_reader._get_n_events()) if "type" in self.reco_tasks: - self.log.info("Number of signal events: %d", self.dl1dh_reader.n_signal_events) - self.log.info("Number of background events: %d", self.dl1dh_reader.n_bkg_events) + self.log.info( + "Number of signal events: %d", self.dl1dh_reader.n_signal_events + ) + self.log.info( + "Number of background events: %d", self.dl1dh_reader.n_bkg_events + ) # Check if the number of events is enough to form a batch if self.dl1dh_reader._get_n_events() < self.batch_size: raise ValueError( @@ -294,7 +307,10 @@ def setup(self): f"Cannot stack telescope images in mono mode. Use stereo mode for stacking." ) # Ckeck if only one telescope type is selected for stacking telescope images - if self.stack_telescope_images and len(list(self.dl1dh_reader.selected_telescopes)) > 1: + if ( + self.stack_telescope_images + and len(list(self.dl1dh_reader.selected_telescopes)) > 1 + ): raise ToolConfigurationError( f"Cannot stack telescope images from multiple telescope types. Use only one telescope type." ) @@ -309,14 +325,16 @@ def setup(self): # Shuffle the indices before the training/validation split np.random.seed(self.random_seed) np.random.shuffle(indices) - n_validation_examples = int(self.validation_split * self.dl1dh_reader._get_n_events()) + n_validation_examples = int( + self.validation_split * self.dl1dh_reader._get_n_events() + ) training_indices = indices[n_validation_examples:] validation_indices = indices[:n_validation_examples] self.training_loader = DLDataLoader( self.dl1dh_reader, training_indices, tasks=self.reco_tasks, - batch_size=self.batch_size*self.strategy.num_replicas_in_sync, + batch_size=self.batch_size * self.strategy.num_replicas_in_sync, random_seed=self.random_seed, sort_by_intensity=self.sort_by_intensity, stack_telescope_images=self.stack_telescope_images, @@ -325,7 +343,7 @@ def setup(self): self.dl1dh_reader, validation_indices, tasks=self.reco_tasks, - batch_size=self.batch_size*self.strategy.num_replicas_in_sync, + batch_size=self.batch_size * self.strategy.num_replicas_in_sync, random_seed=self.random_seed, sort_by_intensity=self.sort_by_intensity, stack_telescope_images=self.stack_telescope_images, @@ -335,11 +353,7 @@ def setup(self): monitor = "val_loss" monitor_mode = "min" # Model checkpoint callback - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - model_path = f"{self.output_dir}/ctlearn_model.keras" - else: - model_path = f"{self.output_dir}/ctlearn_model.cpk" + model_path = f"{self.output_dir}/ctlearn_model.keras" model_checkpoint_callback = keras.callbacks.ModelCheckpoint( filepath=model_path, monitor=monitor, @@ -355,23 +369,32 @@ def setup(self): csv_logger_callback = keras.callbacks.CSVLogger( filename=f"{self.output_dir}/training_log.csv", append=True ) - self.callbacks = [model_checkpoint_callback, tensorboard_callback, csv_logger_callback] - + self.callbacks = [ + model_checkpoint_callback, + tensorboard_callback, + csv_logger_callback, + ] + if self.early_stopping is not None: # EarlyStopping callback - validate_trait_dict(self.early_stopping, ["monitor", "patience", "verbose", "restore_best_weights"]) + validate_trait_dict( + self.early_stopping, + ["monitor", "patience", "verbose", "restore_best_weights"], + ) early_stopping_callback = keras.callbacks.EarlyStopping( - monitor=self.early_stopping["monitor"], - patience=self.early_stopping["patience"], - verbose=self.early_stopping["verbose"], - restore_best_weights=self.early_stopping["restore_best_weights"] + monitor=self.early_stopping["monitor"], + patience=self.early_stopping["patience"], + verbose=self.early_stopping["verbose"], + restore_best_weights=self.early_stopping["restore_best_weights"], ) self.callbacks.append(early_stopping_callback) # Learning rate reducing callback if self.lr_reducing is not None: # Validate the learning rate reducing parameters - validate_trait_dict(self.lr_reducing, ["factor", "patience", "min_delta", "min_lr"]) + validate_trait_dict( + self.lr_reducing, ["factor", "patience", "min_delta", "min_lr"] + ) lr_reducing_callback = keras.callbacks.ReduceLROnPlateau( monitor=monitor, factor=self.lr_reducing["factor"], @@ -383,10 +406,8 @@ def setup(self): ) self.callbacks.append(lr_reducing_callback) - - def start(self): - + # Open a strategy scope. with self.strategy.scope(): # Construct the model @@ -400,7 +421,7 @@ def start(self): # Validate the optimizer parameters validate_trait_dict(self.optimizer, ["name", "base_learning_rate"]) # Set the learning rate for the optimizer - learning_rate = self.optimizer["base_learning_rate"] + learning_rate = self.optimizer["base_learning_rate"] # Set the epsilon for the Adam optimizer adam_epsilon = None if self.optimizer["name"] == "Adam": @@ -419,7 +440,10 @@ def start(self): keras.optimizers.Adam, dict(learning_rate=learning_rate, epsilon=adam_epsilon), ), - "RMSProp": (keras.optimizers.RMSprop, dict(learning_rate=learning_rate)), + "RMSProp": ( + keras.optimizers.RMSprop, + dict(learning_rate=learning_rate), + ), "SGD": (keras.optimizers.SGD, dict(learning_rate=learning_rate)), } # Get the optimizer function and arguments @@ -428,7 +452,9 @@ def start(self): losses, metrics = self._get_losses_and_mertics(self.reco_tasks) # Compile the model self.log.info("Compiling CTLearn model.") - self.model.compile(optimizer=optimizer_fn(**optimizer_args), loss=losses, metrics=metrics) + self.model.compile( + optimizer=optimizer_fn(**optimizer_args), loss=losses, metrics=metrics + ) # Train and evaluate the model self.log.info("Training and evaluating...") @@ -442,7 +468,6 @@ def start(self): ) self.log.info("Training and evaluating finished succesfully!") - def finish(self): # Saving model weights in onnx format @@ -456,7 +481,9 @@ def finish(self): output_path = f"{self.output_dir}/ctlearn_model.onnx" tf2onnx.convert.from_keras( - self.model, input_signature=self.model.input_layer.input._type_spec, output_path=output_path + self.model, + input_signature=self.model.input_layer.input._type_spec, + output_path=output_path, ) self.log.info("ONNX model saved in %s", self.output_dir) @@ -505,12 +532,16 @@ def _get_losses_and_mertics(self, tasks): losses["cameradirection"] = keras.losses.MeanAbsoluteError( reduction="sum_over_batch_size" ) - metrics["cameradirection"] = keras.metrics.MeanAbsoluteError(name="mae_cameradirection") + metrics["cameradirection"] = keras.metrics.MeanAbsoluteError( + name="mae_cameradirection" + ) if "skydirection" in self.reco_tasks: losses["skydirection"] = keras.losses.MeanAbsoluteError( reduction="sum_over_batch_size" ) - metrics["skydirection"] = keras.metrics.MeanAbsoluteError(name="mae_skydirection") + metrics["skydirection"] = keras.metrics.MeanAbsoluteError( + name="mae_skydirection" + ) return losses, metrics diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 055ec254..a4da1b33 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -11,63 +11,47 @@ The following command will set up a conda virtual environment, add the necessary package channels, and install CTLearn specified version and its dependencies: .. code-block:: bash + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite + conda activate ctlearn + pip install ctlearn - CTLEARN_VER=0.10.3 - wget https://raw.githubusercontent.com/ctlearn-project/ctlearn/v$CTLEARN_VER/environment.yml - conda env create -n [ENVIRONMENT_NAME] -f environment.yml - conda activate [ENVIRONMENT_NAME] - pip install ctlearn==$CTLEARN_VER - ctlearn -h +For working on the IT-cluster: +.. code-block:: bash + mamba create -n ctlearn-it-cluster -c conda-forge python==3.12 h5py scipy llvmlite gcc_linux-64 gxx_linux-64 openblas gfortran_linux-64 + conda activate ctlearn-it-cluster + export CC=$(which x86_64-conda-linux-gnu-gcc) + export CXX=$(which x86_64-conda-linux-gnu-g++) + export FC=$(which x86_64-conda-linux-gnu-gfortran) + pip install ctlearn -This should automatically install all dependencies (NOTE: this may take some time, as by default MKL is included as a dependency of NumPy and it is very large). +Please do not forget to update your LD_LIBRARY_PATH to include the necessary paths. For example, you can add the following line to your .bashrc file: +export LD_LIBRARY_PATH=/to/your/.conda/envs/ctlearn-it-cluster/lib:/fefs/aswg/workspace/tjark.miener/cudnn-linux-x86_64-8.9.7.29_cuda12-archive/lib:$LD_LIBRARY_PATH +Note: You would need to replace the /to/your/.conda/envs/ctlearn-it-cluster/lib with the path to your conda environment where ctlearn is installed. cudnn-linux-x86_64-8.9.7.29_cuda12-archive is the path to the cuDNN libraries for CUDA 12. -For working on the IT-cluster, please do not forget to update your LD_LIBRARY_PATH to include the necessary paths. For example, you can add the following line to your .bashrc file: -export LD_LIBRARY_PATH=/to/your/.conda/envs/ctlearn/lib:/fefs/aswg/workspace/tjark.miener/cudnn-linux-x86_64-8.9.2.26_cuda11-archive/lib:/fefs/aswg/workspace/tjark.miener/cudnn-linux-x86_64-8.9.7.29_cuda12-archive/lib:$LD_LIBRARY_PATH -Note: You would need to replace the /to/your/.conda/envs/ctlearn/lib with the path to your conda environment where ctlearn is installed. cudnn-linux-x86_64-8.9.2.26_cuda11-archive and cudnn-linux-x86_64-8.9.7.29_cuda12-archive are the paths to the cuDNN libraries for CUDA 11 and CUDA 12, respectively. Installing with pip/setuptools from source for development ---------------------------------------------------------- -Clone the CTLearn repository: +First, install Anaconda by following the instructions above. Create a new conda environment that includes all the dependencies for CTLearn. Then, clone the CTLearn repository and install CTLearn into the new conda environment with pip from source: .. code-block:: bash + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite + conda activate ctlearn cd git clone https://github.com/ctlearn-project/ctlearn.git + pip install -e . -First, install Anaconda by following the instructions above. Create a new conda environment that includes all the dependencies for CTLearn: - -.. code-block:: bash - - conda env create -f /ctlearn/environment.yml - -Finally, install CTLearn into the new conda environment via pypi: - -.. code-block:: bash - - conda activate ctlearn - pip install ctlearn==0.10.3 - -or with pip from source: - -.. code-block:: bash - - conda activate ctlearn - - cd /ctlearn - pip install . - -NOTE for developers: If you wish to fork/clone the repository and edit the code, install with ``pip -e``. Dependencies ------------ -* Python>=3.10 -* TensorFlow>=2.14,<2.15 -* ctapipe>=0.22.0,<0.26 +* Python>=3.12 +* TensorFlow>=2.16 +* ctapipe>=0.29.0 * ctaplot -* DL1DataHandler>=0.14.5,<0.15 +* DL1DataHandler>=0.14.8 * numba * NumPy * Pandas diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 20a49207..00000000 --- a/environment.yml +++ /dev/null @@ -1,23 +0,0 @@ -# conda env create -f environment.yml -name: ctlearn -channels: - - anaconda - - conda-forge -dependencies: - - python=3.10 - - astropy - - setuptools - - numpy - - pandas - - pip - - pytables - - tables - - c-blosc2=2.13 - - pyyaml - - scikit-learn - - ctapipe - - pip: - - numba - - tensorflow>=2.14,<2.15 - - dl1_data_handler>=0.14.5,<0.15 - - pydot diff --git a/pyproject.toml b/pyproject.toml index 434bd27a..bab81d9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,16 +20,15 @@ classifiers = [ "Topic :: Scientific/Engineering :: Astronomy", "Topic :: Scientific/Engineering :: Physics", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] -requires-python = ">=3.10" +requires-python = ">=3.12" dependencies = [ - "dl1_data_handler>=0.14.5, <0.15", + "dl1_data_handler>=0.14.8", "astropy", "numpy", "pandas", @@ -37,10 +36,10 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.14,<2.15", + "tensorflow>=2.16", "pydot", "setuptools", - "ctapipe>=0.22.0, <0.26", + "ctapipe[all]>=0.29", ] dynamic = ["version"] @@ -71,6 +70,9 @@ version_file = "ctlearn/_version.py" [tool.pytest.ini_options] testpaths = ["ctlearn"] +markers = [ + "verifies_usecase(usecase_id): mark test as verifying a specific use case", +] norecursedirs = [ ".git",