From be15ea5de1e2d6327aacbfd5c8290fad2125e3db Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Sat, 13 Dec 2025 17:05:32 +0100 Subject: [PATCH 01/27] upgrade some versions --- .github/workflows/python-package-conda.yml | 2 +- environment.yml | 4 ++-- pyproject.toml | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index c2c8c558..ac776e41 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -15,7 +15,7 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: [ '3.10','3.11', '3.12', '3.13'] + pyv: [ '3.12', '3.13', '3.14'] max-parallel: 5 runs-on: ${{ matrix.os }} steps: diff --git a/environment.yml b/environment.yml index 20a49207..6deb7e57 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: - anaconda - conda-forge dependencies: - - python=3.10 + - python=3.12 - astropy - setuptools - numpy @@ -18,6 +18,6 @@ dependencies: - ctapipe - pip: - numba - - tensorflow>=2.14,<2.15 + - tensorflow>=2.19,<2.21 - dl1_data_handler>=0.14.5,<0.15 - pydot diff --git a/pyproject.toml b/pyproject.toml index 434bd27a..fbff1985 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ classifiers = [ ] -requires-python = ">=3.10" +requires-python = ">=3.12" dependencies = [ "dl1_data_handler>=0.14.5, <0.15", "astropy", @@ -37,10 +37,10 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.14,<2.15", + "tensorflow>=2.19,<2.21", "pydot", "setuptools", - "ctapipe>=0.22.0, <0.26", + "ctapipe>=0.28.0", ] dynamic = ["version"] From f040131681eb1de11edc85dcd39d120f467845d2 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 13:43:10 +0100 Subject: [PATCH 02/27] polish CI --- .github/workflows/python-package-conda.yml | 91 +++++++++++++--------- environment.yml | 23 ------ pyproject.toml | 8 +- 3 files changed, 59 insertions(+), 63 deletions(-) delete mode 100644 environment.yml diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index ac776e41..48426baa 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -1,4 +1,3 @@ - name: CI on: @@ -15,39 +14,61 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: [ '3.12', '3.13', '3.14'] - max-parallel: 5 + python-version: ['3.12', '3.13', '3.14'] + ctlearn-version: ['latest', 'nightly'] + max-parallel: 6 runs-on: ${{ matrix.os }} + continue-on-error: ${{ matrix.ctlearn-version == 'nightly' || matrix.python-version == '3.14' }} steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - # Install Miniconda - wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - bash miniconda.sh -b -p $HOME/miniconda - echo "$HOME/miniconda/bin" >> $GITHUB_PATH - source $HOME/miniconda/bin/activate - # Install Mamba via conda (since we don't have mamba yet) - $HOME/miniconda/bin/conda config --add channels conda-forge - $HOME/miniconda/bin/conda install -y mamba=2.0.8 - mamba install -y python=${{ matrix.pyv }} - - name: Add MKL_THREADING_LAYER variable - run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV - - name: Install dependencies with Mamba - run: | - source $HOME/miniconda/bin/activate - mamba env update --file environment.yml --name base - - name: Lint with flake8 - run: | - source $HOME/miniconda/bin/activate - mamba install flake8 - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Install with pip - run: | - pip install -e . - - name: Test with pytest - run: | - source $HOME/miniconda/bin/activate - mamba install pytest - pytest + - uses: actions/checkout@v4 + + - name: Set up Miniconda + run: | + wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + bash miniconda.sh -b -p $HOME/miniconda + echo "$HOME/miniconda/bin" >> $GITHUB_PATH + source $HOME/miniconda/etc/profile.d/conda.sh + conda config --add channels conda-forge + conda install -y mamba + + - name: Create Python environment + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba create -y -n ctlearn python==${{ matrix.python-version }} -c conda-forge + mamba activate ctlearn + + - name: Install dependencies + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + sudo apt-get update + sudo apt-get install -y git + pip install --upgrade pip + pip install pylint pylint-exit anybadge eventio pytest flake8 + if [ "${{ matrix.ctlearn-version }}" = "nightly" ]; then + pip install git+https://github.com/cta-observatory/dl1-data-handler.git + else + pip install dl1-data-handler + fi + + - name: Add MKL_THREADING_LAYER variable + run: echo "MKL_THREADING_LAYER=GNU" >> $GITHUB_ENV + + - name: Lint with flake8 + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Install package with pip + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + pip install -e . + + - name: Run pytest + run: | + source $HOME/miniconda/etc/profile.d/conda.sh + mamba activate ctlearn + pytest diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 6deb7e57..00000000 --- a/environment.yml +++ /dev/null @@ -1,23 +0,0 @@ -# conda env create -f environment.yml -name: ctlearn -channels: - - anaconda - - conda-forge -dependencies: - - python=3.12 - - astropy - - setuptools - - numpy - - pandas - - pip - - pytables - - tables - - c-blosc2=2.13 - - pyyaml - - scikit-learn - - ctapipe - - pip: - - numba - - tensorflow>=2.19,<2.21 - - dl1_data_handler>=0.14.5,<0.15 - - pydot diff --git a/pyproject.toml b/pyproject.toml index fbff1985..1ae3afa1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,8 +20,6 @@ classifiers = [ "Topic :: Scientific/Engineering :: Astronomy", "Topic :: Scientific/Engineering :: Physics", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -29,7 +27,7 @@ classifiers = [ requires-python = ">=3.12" dependencies = [ - "dl1_data_handler>=0.14.5, <0.15", + "dl1_data_handler>=0.14.5", "astropy", "numpy", "pandas", @@ -37,10 +35,10 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.19,<2.21", + "tensorflow>=2.20", "pydot", "setuptools", - "ctapipe>=0.28.0", + "ctapipe[all]>=0.28", ] dynamic = ["version"] From 1946d079f2755484ee7b2baef42a40a751105db0 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 13:47:14 +0100 Subject: [PATCH 03/27] run shell hook after mamba install in CI --- .github/workflows/python-package-conda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 48426baa..8a16954a 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -30,6 +30,7 @@ jobs: source $HOME/miniconda/etc/profile.d/conda.sh conda config --add channels conda-forge conda install -y mamba + eval "$(mamba shell hook --shell bash)" - name: Create Python environment run: | From 319b2277820f7500556f8b956ed48e4fa6793c92 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 13:50:47 +0100 Subject: [PATCH 04/27] use conda to activate the envs --- .github/workflows/python-package-conda.yml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 8a16954a..3d37b982 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -30,18 +30,17 @@ jobs: source $HOME/miniconda/etc/profile.d/conda.sh conda config --add channels conda-forge conda install -y mamba - eval "$(mamba shell hook --shell bash)" - name: Create Python environment run: | source $HOME/miniconda/etc/profile.d/conda.sh mamba create -y -n ctlearn python==${{ matrix.python-version }} -c conda-forge - mamba activate ctlearn + conda activate ctlearn - name: Install dependencies run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn sudo apt-get update sudo apt-get install -y git pip install --upgrade pip @@ -58,18 +57,18 @@ jobs: - name: Lint with flake8 run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Install package with pip run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn pip install -e . - name: Run pytest run: | source $HOME/miniconda/etc/profile.d/conda.sh - mamba activate ctlearn + conda activate ctlearn pytest From 895d5757c5d52901d61b4d0fe42f438b338394cf Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:08:12 +0100 Subject: [PATCH 05/27] fixs some CI --- .github/workflows/python-package-conda.yml | 6 +++--- pyproject.toml | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-package-conda.yml b/.github/workflows/python-package-conda.yml index 3d37b982..da00acbf 100644 --- a/.github/workflows/python-package-conda.yml +++ b/.github/workflows/python-package-conda.yml @@ -15,10 +15,10 @@ jobs: matrix: os: [ubuntu-22.04] python-version: ['3.12', '3.13', '3.14'] - ctlearn-version: ['latest', 'nightly'] + dl1dh-version: ['latest', 'nightly'] max-parallel: 6 runs-on: ${{ matrix.os }} - continue-on-error: ${{ matrix.ctlearn-version == 'nightly' || matrix.python-version == '3.14' }} + continue-on-error: ${{ matrix.dl1dh-version == 'nightly' || matrix.python-version == '3.14' }} steps: - uses: actions/checkout@v4 @@ -45,7 +45,7 @@ jobs: sudo apt-get install -y git pip install --upgrade pip pip install pylint pylint-exit anybadge eventio pytest flake8 - if [ "${{ matrix.ctlearn-version }}" = "nightly" ]; then + if [ "${{ matrix.dl1dh-version }}" = "nightly" ]; then pip install git+https://github.com/cta-observatory/dl1-data-handler.git else pip install dl1-data-handler diff --git a/pyproject.toml b/pyproject.toml index 1ae3afa1..c361c71f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ classifiers = [ "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] @@ -35,7 +36,7 @@ dependencies = [ "pyyaml", "scikit-learn", "numba", - "tensorflow>=2.20", + "tensorflow>=2.16", "pydot", "setuptools", "ctapipe[all]>=0.28", From 79102c77b1f1ec245a4bd4e27fb062f226440ac9 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:16:45 +0100 Subject: [PATCH 06/27] remove support for keras2 --- ctlearn/tools/predict_LST1.py | 9 +++------ ctlearn/tools/predict_model.py | 18 +++++++++--------- ctlearn/tools/train_model.py | 6 +----- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/ctlearn/tools/predict_LST1.py b/ctlearn/tools/predict_LST1.py index e27e68c6..a4751d9a 100644 --- a/ctlearn/tools/predict_LST1.py +++ b/ctlearn/tools/predict_LST1.py @@ -120,7 +120,7 @@ class LST1PredictionTool(Tool): load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the classification of the primary particle type." ), allow_none=True, @@ -132,7 +132,7 @@ class LST1PredictionTool(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle energy." ), allow_none=True, @@ -144,7 +144,7 @@ class LST1PredictionTool(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) " + "Path to a Keras model file (Keras3) " "for the regression of the primary particle arrival direction " "based on the camera coordinate offsets." ), @@ -514,9 +514,6 @@ def start(self): image = get_unmapped_image(event, self.channels, self.transforms) data.append(self.image_mapper.map_image(image)) input_data = {"input": np.array(data)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - input_data = input_data["input"] event_id.extend(dl1_table["event_id"].data) tel_azimuth.extend(dl1_table["tel_az"].data) diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 0eaa8b23..ea86c811 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -107,14 +107,14 @@ class PredictCTLearnModel(Tool): prefix : str Name of the reconstruction algorithm used to generate the dl2 data. load_type_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the classification of the primary particle type. + Path to a Keras model file (Keras3) for the classification of the primary particle type. load_energy_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression of the primary particle energy. + Path to a Keras model file (Keras3) for the regression of the primary particle energy. load_cameradirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on camera coordinate offsets. load_skydirection_model_from : pathlib.Path - Path to a Keras model file (Keras3) or directory (Keras2) for the regression + Path to a Keras model file (Keras3) for the regression of the primary particle arrival direction based on spherical coordinate offsets. output_path : pathlib.Path Output path to save the dl2 prediction results. @@ -228,7 +228,7 @@ class PredictCTLearnModel(Tool): load_type_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the classification " + "Path to a Keras model file (Keras3) for the classification " "of the primary particle type." ), allow_none=True, @@ -240,7 +240,7 @@ class PredictCTLearnModel(Tool): load_energy_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle energy." ), allow_none=True, @@ -252,7 +252,7 @@ class PredictCTLearnModel(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle arrival direction based on camera coordinate offsets." ), allow_none=True, @@ -264,7 +264,7 @@ class PredictCTLearnModel(Tool): load_skydirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) or directory (Keras2) for the regression " + "Path to a Keras model file (Keras3) for the regression " "of the primary particle arrival direction based on spherical coordinate offsets." ), allow_none=True, @@ -435,7 +435,7 @@ def _predict_with_model(self, model_path): Parameters ---------- model_path : str - Path to a Keras model file (Keras3) or directory (Keras2). + Path to a Keras model file (Keras3). Returns ------- diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index 2ce4ced5..4c440350 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -335,11 +335,7 @@ def setup(self): monitor = "val_loss" monitor_mode = "min" # Model checkpoint callback - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - model_path = f"{self.output_dir}/ctlearn_model.keras" - else: - model_path = f"{self.output_dir}/ctlearn_model.cpk" + model_path = f"{self.output_dir}/ctlearn_model.keras" model_checkpoint_callback = keras.callbacks.ModelCheckpoint( filepath=model_path, monitor=monitor, From 226eaf500af5af7a322b78dd8091a287e448643b Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:23:13 +0100 Subject: [PATCH 07/27] remove support for keras2 also in loader --- ctlearn/core/loader.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/ctlearn/core/loader.py b/ctlearn/core/loader.py index 5b723f9c..f018f677 100644 --- a/ctlearn/core/loader.py +++ b/ctlearn/core/loader.py @@ -186,9 +186,6 @@ def _get_mono_item(self, batch): ), axis=1, ) - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] return features, labels def _get_stereo_item(self, batch): @@ -309,7 +306,4 @@ def _get_stereo_item(self, batch): features = {"input": np.array(mono_feature_vectors)} if "stereo_feature_vectors" in batch.colnames: features = {"input": np.array(stereo_feature_vectors)} - # Temp fix for supporting keras2 & keras3 - if int(keras.__version__.split(".")[0]) >= 3: - features = features["input"] return features, labels From 0d1955d8df2c10d771f00418081a2cc504c45b42 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:33:20 +0100 Subject: [PATCH 08/27] update installation instructions --- README.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/README.rst b/README.rst index 81556e43..fa922416 100644 --- a/README.rst +++ b/README.rst @@ -28,22 +28,22 @@ CTLearn is a package under active development to run deep learning models to ana Installation for users ---------------------- -Download and install `Anaconda `_\ , or, for a minimal installation, `Miniconda `_. -The following command will set up a conda virtual environment, add the -necessary package channels, and install CTLearn specified version and its dependencies: +Installation +------------ + +First, create and activate a fresh conda environment: .. code-block:: bash - CTLEARN_VER=0.10.3 - wget https://raw.githubusercontent.com/ctlearn-project/ctlearn/v$CTLEARN_VER/environment.yml - conda env create -n [ENVIRONMENT_NAME] -f environment.yml - conda activate [ENVIRONMENT_NAME] - pip install ctlearn==$CTLEARN_VER - ctlearn -h + mamba create -n ctlearn -c conda-forge python==3.12 + mamba activate ctlearn + +The lastest version fo this package can be installed as a pip package: +.. code-block:: bash -This should automatically install all dependencies (NOTE: this may take some time, as by default MKL is included as a dependency of NumPy and it is very large). + pip install ctlearn See the documentation for further information like `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. From e78e19c7d98e4528246c70d033daf75850230638 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Mon, 15 Dec 2025 16:56:21 +0100 Subject: [PATCH 09/27] remove conda deploy workflow and polish pypi one --- .github/conda/bld.bat | 2 - .github/conda/build.sh | 2 - .github/conda/conda_build_config.yaml | 7 --- .github/conda/meta.yaml | 56 ------------------ .github/workflows/release.yml | 81 ++++++++------------------- 5 files changed, 23 insertions(+), 125 deletions(-) delete mode 100644 .github/conda/bld.bat delete mode 100644 .github/conda/build.sh delete mode 100644 .github/conda/conda_build_config.yaml delete mode 100644 .github/conda/meta.yaml diff --git a/.github/conda/bld.bat b/.github/conda/bld.bat deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/bld.bat +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/build.sh b/.github/conda/build.sh deleted file mode 100644 index 89481145..00000000 --- a/.github/conda/build.sh +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -$PYTHON setup.py install --single-version-externally-managed --record=record.txt diff --git a/.github/conda/conda_build_config.yaml b/.github/conda/conda_build_config.yaml deleted file mode 100644 index 49777cb0..00000000 --- a/.github/conda/conda_build_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -python: - - 3.13 - - 3.12 - - 3.11 - - 3.10 - - diff --git a/.github/conda/meta.yaml b/.github/conda/meta.yaml deleted file mode 100644 index b78f32b9..00000000 --- a/.github/conda/meta.yaml +++ /dev/null @@ -1,56 +0,0 @@ -{% set data = load_setup_py_data() %} - -package: - name: ctlearn - version: {{ data.get('version') }} -source: - path: ../.. - -build: - #noarch: generic - number: 0 - -requirements: - build: - - python #==3.12 - - numpy >=1.20 - - setuptools - - astropy - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - host: - - python #==3.12 - - numpy >=1.20 - - astropy - - setuptools - - scipy - - jupyter - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - run: - - python #==3.12 - - numpy >=1.20 - - jupyter - - setuptools - - astropy - - scipy - - ctapipe ==0.20.0 - - pytables >=3.8 - - pandas - - test: - imports: - - ctlearn -about: - home: https://github.com/ctlearn-project/ctlearn/ - license: BSD3-Clause - license_file: LICENSE - summary: Deep Learning for IACT Event Reconstruction. -extra: - recipe-maintainers: - - TjarkMiener - - nietootein diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 646613b1..51a7d068 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,13 +2,14 @@ name: Release CD on: release: - types: [published] + types: [published] workflow_dispatch: + jobs: pypi-publish: name: Publish release to PyPI environment: - name: pypi + name: pypi url: https://pypi.org/project/ctlearn/ permissions: id-token: write @@ -20,64 +21,28 @@ jobs: strategy: matrix: os: [ubuntu-22.04] - pyv: ['3.10'] + pyv: ['3.12'] max-parallel: 5 runs-on: ${{ matrix.os }} + steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - #$CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - - - name: Build package - run: | - python --version - pip install -U build - python -m build - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + - uses: actions/checkout@v4 + - name: Set up micromamba (Python ${{ matrix.pyv }}) + uses: mamba-org/setup-micromamba@v1 + with: + environment-name: ctlearn + create-args: >- + python=${{ matrix.pyv }} + pip + cache-environment: true - condapublish: - strategy: - matrix: - os: [ubuntu-22.04] - pyv: ["3.10"] - max-parallel: 5 - runs-on: ${{ matrix.os }} - permissions: - id-token: write - contents: write - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Set up Python ${{ matrix.pyv }} - run: | - conda install -y python=${{ matrix.pyv }} - - - name: Add conda to system path - run: | - # $CONDA is an environment variable pointing to the root of the miniconda directory - echo $CONDA/bin >> $GITHUB_PATH - - - name: Install dependencies - run: | - conda env update --file environment.yml --name base - sudo apt-get install python3-numpy - - - name: publish-to-conda - uses: fcakyon/conda-publish-action@v1.3 - with: - subdir: '.github/conda' - anacondatoken: ${{ secrets.ANACONDA_TOKEN }} + - name: Build package + shell: micromamba-shell {0} + run: | + python --version + pip install -U build + python -m build + + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file From 22b6f6cd37d9f311ce1aebdfe6468ba0d19be772 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Tue, 16 Dec 2025 14:15:02 +0100 Subject: [PATCH 10/27] test building of pypi run on PRs when workflow are touched, but only upload on github.event release --- .github/workflows/release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 51a7d068..984895f4 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,6 +4,9 @@ on: release: types: [published] workflow_dispatch: + pull_request: + paths: + - '.github/workflows/**' jobs: pypi-publish: @@ -45,4 +48,5 @@ jobs: python -m build - name: Publish package distributions to PyPI + if: github.event_name == 'release' uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file From ae421657f0531091dc44ca87fae80d2816ddf237 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 10:00:14 +0100 Subject: [PATCH 11/27] bump python to v3.12 also for the docs --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 9d5aecee..71fb5123 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,7 +6,7 @@ sphinx: build: os: ubuntu-22.04 tools: - python: "3.10" + python: "3.12" python: install: From 608ce714c18a597013114c5df24bda526453311c Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 14:53:49 +0100 Subject: [PATCH 12/27] update installation instructions --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index fa922416..f69dbb25 100644 --- a/README.rst +++ b/README.rst @@ -36,7 +36,7 @@ First, create and activate a fresh conda environment: .. code-block:: bash - mamba create -n ctlearn -c conda-forge python==3.12 + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite mamba activate ctlearn The lastest version fo this package can be installed as a pip package: From 84f549c0ab2df1210f8f38c75bf81b5e06f996c2 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 14:54:35 +0100 Subject: [PATCH 13/27] fix some keras3 updates --- ctlearn/conftest.py | 18 ++++++++---------- ctlearn/core/loader.py | 8 ++++---- ctlearn/core/model.py | 6 +++--- ctlearn/core/tests/test_loader.py | 6 +++--- ctlearn/tools/__init__.py | 11 +++++++++++ ctlearn/tools/predict_model.py | 6 ------ 6 files changed, 29 insertions(+), 26 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index d912ae18..dd838034 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -1,16 +1,14 @@ """ common pytest fixtures for tests in ctlearn. -Credits to ctapipe for the original code. """ import pytest - from ctapipe.core import run_tool from ctapipe.utils import get_dataset_path @pytest.fixture(scope="session") -def prod5_gamma_simtel_path(): - return get_dataset_path("gamma_prod5.simtel.zst") +def gamma_simtel_path(): + return get_dataset_path("gamma_test_large.simtel.gz") @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): @@ -23,7 +21,7 @@ def r1_tmp_path(tmp_path_factory): return tmp_path_factory.mktemp("r1_") @pytest.fixture(scope="session") -def dl1_gamma_file(dl1_tmp_path, prod5_gamma_simtel_path): +def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ DL1 file containing both images and parameters from a gamma simulation set. """ @@ -32,16 +30,16 @@ def dl1_gamma_file(dl1_tmp_path, prod5_gamma_simtel_path): output = dl1_tmp_path / "gamma.dl1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={gamma_simtel_path}", f"--output={output}", "--write-images", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @pytest.fixture(scope="session") -def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): +def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ R1 file containing both waveforms and parameters from a gamma simulation set. """ @@ -50,10 +48,10 @@ def r1_gamma_file(r1_tmp_path, prod5_gamma_simtel_path): output = r1_tmp_path / "gamma.r1.h5" argv = [ - f"--input={prod5_gamma_simtel_path}", + f"--input={gamma_simtel_path}", f"--output={output}", f"--DataWriter.write_r1_waveforms=True", - "--DataWriter.Contact.name=αℓℓ the äüöß", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 return output \ No newline at end of file diff --git a/ctlearn/core/loader.py b/ctlearn/core/loader.py index f018f677..92fd96c2 100644 --- a/ctlearn/core/loader.py +++ b/ctlearn/core/loader.py @@ -155,7 +155,7 @@ def _get_mono_item(self, batch): """ # Retrieve the telescope images and store in the features dictionary labels = {} - features = {"input": batch["features"].data} + features = batch["features"].data if "type" in self.tasks: labels["type"] = to_categorical( batch["true_shower_primary_class"].data, @@ -300,10 +300,10 @@ def _get_stereo_item(self, batch): ) # Store the fatures in the features dictionary if "features" in batch.colnames: - features = {"input": np.array(features)} + features = np.array(features) # TDOO: Add support for both feature vectors if "mono_feature_vectors" in batch.colnames: - features = {"input": np.array(mono_feature_vectors)} + features = np.array(mono_feature_vectors) if "stereo_feature_vectors" in batch.colnames: - features = {"input": np.array(stereo_feature_vectors)} + features = np.array(stereo_feature_vectors) return features, labels diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py index 07c79d46..bfac48c2 100644 --- a/ctlearn/core/model.py +++ b/ctlearn/core/model.py @@ -288,7 +288,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Get model arcihtecture parameters for the backbone filters_list = [layer["filters"] for layer in self.architecture] kernel_sizes = [layer["kernel_size"] for layer in self.architecture] @@ -472,7 +472,7 @@ def _build_backbone(self, input_shape): Keras input layer object for the backbone model. """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Apply initial padding if specified if self.init_padding > 0: network_input = keras.layers.ZeroPadding2D( @@ -880,7 +880,7 @@ def _build_backbone(self, input_shape): """ # Define the input layer from the input shape - network_input = keras.Input(shape=input_shape, name="input") + network_input = keras.Input(shape=input_shape) # Set the backbone model to be trainable or not for layer in self.model.layers: if layer.name.endswith("_block"): diff --git a/ctlearn/core/tests/test_loader.py b/ctlearn/core/tests/test_loader.py index 97f87172..7fe71900 100644 --- a/ctlearn/core/tests/test_loader.py +++ b/ctlearn/core/tests/test_loader.py @@ -1,17 +1,17 @@ -import pytest from traitlets.config.loader import Config from dl1_data_handler.reader import DLImageReader from ctlearn.core.loader import DLDataLoader -def test_data_loader(dl1_tmp_path, dl1_gamma_file): +def test_data_loader(dl1_gamma_file): """check""" # Create a configuration suitable for the test config = Config( { "DLImageReader": { "allowed_tels": [4], + "focal_length_choice": "EQUIVALENT", }, } ) @@ -34,4 +34,4 @@ def test_data_loader(dl1_tmp_path, dl1_gamma_file): and "skydirection" in labels ) # Check the shape of the features - assert features["input"].shape == (1, 110, 110, 2) + assert features.shape == (1, 110, 110, 2) diff --git a/ctlearn/tools/__init__.py b/ctlearn/tools/__init__.py index 996469a1..d54df32d 100644 --- a/ctlearn/tools/__init__.py +++ b/ctlearn/tools/__init__.py @@ -1,2 +1,13 @@ """ctlearn command line tools. """ + +from .train_model import TrainCTLearnModel +from .predict_LST1 import LST1PredictionTool +from .predict_model import MonoPredictCTLearnModel, StereoPredictCTLearnModel + +__all__ = [ + "TrainCTLearnModel", + "LST1PredictionTool", + "MonoPredictCTLearnModel", + "StereoPredictCTLearnModel" +] \ No newline at end of file diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index ea86c811..3d55b150 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -65,12 +65,6 @@ SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] -__all__ = [ - "PredictCTLearnModel", - "MonoPredictCTLearnModel", - "StereoPredictCTLearnModel", -] - class PredictCTLearnModel(Tool): """ From 3a3faa07b6bc5a25c7083ab637b8d6cabf0f5fc4 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 14:55:25 +0100 Subject: [PATCH 14/27] add first tests for TrainCTLearnModel --- ctlearn/tools/tests/test_train_model.py | 60 +++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 ctlearn/tools/tests/test_train_model.py diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py new file mode 100644 index 00000000..5005cd60 --- /dev/null +++ b/ctlearn/tools/tests/test_train_model.py @@ -0,0 +1,60 @@ +import pandas as pd +import pytest +import shutil + +from ctapipe.core import run_tool +from ctlearn.tools import TrainCTLearnModel + +#@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) +@pytest.mark.parametrize("reco_task", ["energy", "cameradirection", "skydirection"]) +def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): + """ + Test training CTLearn model using a temporary copy of the DL1 gamma file. + Each test run gets its own isolated temp directories. + """ + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=2", + "--TrainCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + "--DLImageReader.allowed_tel_types=LST_LST_LSTCam", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5" + ]) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + # --- Additional checks --- + # Check that the trained model exists + model_file = output_dir / "ctlearn_model.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + # Check training_log.csv exists + log_file = output_dir / "training_log.csv" + assert log_file.exists(), f"Training log file not found for {reco_task}" + # Read CSV and verify number of epochs + log_df = pd.read_csv(log_file) + num_epochs_logged = log_df.shape[0] + assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" \ No newline at end of file From b4d226941ad573ae34c13bfc0a8342143da6fdf4 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 17 Dec 2025 17:11:43 +0100 Subject: [PATCH 15/27] added test for the 'type' reco task --- ctlearn/conftest.py | 23 +++++++++++++++++ ctlearn/tools/tests/test_train_model.py | 34 ++++++++++++++++++++----- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index dd838034..8349f73f 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -10,6 +10,12 @@ def gamma_simtel_path(): return get_dataset_path("gamma_test_large.simtel.gz") +@pytest.fixture(scope="session") +def proton_simtel_path(): + return get_dataset_path( + "proton_20deg_0deg_run4___cta-prod5-paranal_desert-2147m-Paranal-dark-100evts.simtel.zst" + ) + @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): """Temporary directory for global dl1 test data""" @@ -38,6 +44,23 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output +@pytest.fixture(scope="session") +def dl1_proton_file(dl1_tmp_path, proton_simtel_path): + """ + DL1 file containing both images and parameters from a proton simulation set. + """ + from ctapipe.tools.process import ProcessorTool + + output = dl1_tmp_path / "proton.dl1.h5" + argv = [ + f"--input={proton_simtel_path}", + f"--output={output}", + "--write-images", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + ] + assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 + return output + @pytest.fixture(scope="session") def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index 5005cd60..9d936b60 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -5,11 +5,10 @@ from ctapipe.core import run_tool from ctlearn.tools import TrainCTLearnModel -#@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) -@pytest.mark.parametrize("reco_task", ["energy", "cameradirection", "skydirection"]) -def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): +@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) +def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): """ - Test training CTLearn model using a temporary copy of the DL1 gamma file. + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. """ # Temporary directories for signal and background @@ -21,6 +20,8 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): # Hardcopy DL1 gamma file to the signal directory shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) # Output directory for trained model output_dir = tmp_path / f"ctlearn_{reco_task}" @@ -34,15 +35,19 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): "--TrainCTLearnModel.n_epochs=2", "--TrainCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", - "--DLImageReader.allowed_tel_types=LST_LST_LSTCam", ] # Include background only for classification task if reco_task == "type": + allowed_tels = {7, 13, 15, 16, 17, 19} argv.extend([ f"--background={background_dir}", - "--pattern-background=*.dl1.h5" + "--pattern-background=*.dl1.h5", + f"--DLImageReader.allowed_tels={allowed_tels}", + "--DLImageReader.enforce_subarray_equality=False", ]) + else: + argv.extend(["--DLImageReader.allowed_tel_types=LST_LST_LSTCam"]) # Run training assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 @@ -57,4 +62,19 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, tmp_path): # Read CSV and verify number of epochs log_df = pd.read_csv(log_file) num_epochs_logged = log_df.shape[0] - assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" \ No newline at end of file + assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" + # Check that val_loss column exists + assert "val_loss" in log_df.columns, ( + f"'val_loss' column missing in training_log.csv for {reco_task}" + ) + val_loss_min= 0.0 + val_loss_max= 1.5 if reco_task == "skydirection" else 1.0 + # Check val_loss values are between 0.0 and 1.0 (or 1.5 for skydirection) + val_loss = log_df["val_loss"].dropna() + assert not val_loss.empty, ( + f"'val_loss' column is empty for {reco_task}" + ) + assert ((val_loss >= val_loss_min) & (val_loss <= val_loss_max)).all(), ( + f"'val_loss' values out of range [{val_loss_min}, {val_loss_max}] for {reco_task}: " + f"{val_loss.tolist()}" + ) \ No newline at end of file From b2a5f6b56a06b6c8bdc56fbff33a8de69500644e Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 19 Dec 2025 20:17:44 +0100 Subject: [PATCH 16/27] add test for prediction tool --- ctlearn/conftest.py | 62 +++++++++++++++++- ctlearn/tools/tests/test_predict_model.py | 77 +++++++++++++++++++++++ ctlearn/tools/tests/test_train_model.py | 4 -- 3 files changed, 137 insertions(+), 6 deletions(-) create mode 100644 ctlearn/tools/tests/test_predict_model.py diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 8349f73f..c8d22ef8 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -3,8 +3,11 @@ """ import pytest +import shutil + from ctapipe.core import run_tool from ctapipe.utils import get_dataset_path +from ctlearn.tools import TrainCTLearnModel @pytest.fixture(scope="session") def gamma_simtel_path(): @@ -33,13 +36,14 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ from ctapipe.tools.process import ProcessorTool + allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "gamma.dl1.h5" - argv = [ f"--input={gamma_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -51,12 +55,14 @@ def dl1_proton_file(dl1_tmp_path, proton_simtel_path): """ from ctapipe.tools.process import ProcessorTool + allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "proton.dl1.h5" argv = [ f"--input={proton_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -77,4 +83,56 @@ def r1_gamma_file(r1_tmp_path, gamma_simtel_path): "--SimTelEventSource.focal_length_choice=EQUIVALENT", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 - return output \ No newline at end of file + return output + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + ctlearn_trained_dl1_models = {} + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--DLImageReader.focal_length_choice=EQUIVALENT", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_dl1_models[reco_task] = output_dir / "ctlearn_model.keras" + # Check that the trained model exists + assert ctlearn_trained_dl1_models[reco_task].exists() + return ctlearn_trained_dl1_models \ No newline at end of file diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py new file mode 100644 index 00000000..55e6d97e --- /dev/null +++ b/ctlearn/tools/tests/test_predict_model.py @@ -0,0 +1,77 @@ +import shutil +import numpy as np + +from ctapipe.core import run_tool +from ctapipe.io import TableLoader +from ctlearn.tools import MonoPredictCTLearnModel + +def test_predict_model(ctlearn_trained_dl1_models, dl1_gamma_file, tmp_path): + """ + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "cameradirection"]: + shutil.copy(ctlearn_trained_dl1_models[f"{reco_task}"], model_dir / f"ctlearn_model_{reco_task}.keras") + model_file = model_dir / f"ctlearn_model_{reco_task}.keras" + assert model_file.exists(), f"Trained model file not found for {reco_task}" + + # Build command-line arguments + output_file = dl2_dir / "gamma.dl2.h5" + argv = [ + f"--input_url={dl1_gamma_file}", + f"--output={output_file}", + "--PredictCTLearnModel.batch_size=4", + "--DLImageReader.focal_length_choice=EQUIVALENT", + ] + + # Run Prediction for energy and type together + assert run_tool( + MonoPredictCTLearnModel(), + argv = argv + [ + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", + "--use-HDF5Merger", + "--no-dl1-images", + "--no-true-images", + ], + cwd=tmp_path + ) == 0 + + assert run_tool( + MonoPredictCTLearnModel(), + argv= argv + [ + f"--PredictCTLearnModel.load_cameradirection_model_from=" + f"{model_dir}/ctlearn_model_cameradirection.keras", + "--no-use-HDF5Merger", + ], + cwd=tmp_path, + ) == 0 + + + allowed_tels = [7, 13, 15, 16, 17, 19] + required_columns = [ + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + "CTLearn_alt", + "CTLearn_az", + "CTLearn_prediction", + "CTLearn_energy", + ] + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + events = loader.read_telescope_events_by_id(telescopes=allowed_tels) + for tel_id in allowed_tels: + assert len(events[tel_id]) > 0 + for col in required_columns: + assert col in events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index 9d936b60..ed661409 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -39,15 +39,11 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat # Include background only for classification task if reco_task == "type": - allowed_tels = {7, 13, 15, 16, 17, 19} argv.extend([ f"--background={background_dir}", "--pattern-background=*.dl1.h5", - f"--DLImageReader.allowed_tels={allowed_tels}", "--DLImageReader.enforce_subarray_equality=False", ]) - else: - argv.extend(["--DLImageReader.allowed_tel_types=LST_LST_LSTCam"]) # Run training assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 From f524fb861ffaf7e410fe56263dd182ecc427f807 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 9 Jan 2026 17:43:00 +0100 Subject: [PATCH 17/27] fixes prediction tools when selecting a subset of tel_ids also added tests for mono predict tool remove flag to use_HDF5_merger component and use it by default overall clean up and code improvement --- ctlearn/conftest.py | 6 +- ctlearn/tools/predict_model.py | 847 +++++++++++++--------- ctlearn/tools/tests/test_predict_model.py | 109 +-- ctlearn/tools/tests/test_train_model.py | 13 +- 4 files changed, 574 insertions(+), 401 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index c8d22ef8..63226540 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -36,14 +36,12 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ from ctapipe.tools.process import ProcessorTool - allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "gamma.dl1.h5" argv = [ f"--input={gamma_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", - f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -55,14 +53,12 @@ def dl1_proton_file(dl1_tmp_path, proton_simtel_path): """ from ctapipe.tools.process import ProcessorTool - allowed_tels = {7, 13, 15, 16, 17, 19} output = dl1_tmp_path / "proton.dl1.h5" argv = [ f"--input={proton_simtel_path}", f"--output={output}", "--write-images", "--SimTelEventSource.focal_length_choice=EQUIVALENT", - f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output @@ -111,6 +107,7 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory output_dir = tmp_path / f"ctlearn_{reco_task}" # Build command-line arguments + allowed_tels = [7, 13, 15, 16, 17, 19] argv = [ f"--signal={signal_dir}", "--pattern-signal=*.dl1.h5", @@ -119,6 +116,7 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory "--TrainCTLearnModel.n_epochs=1", "--TrainCTLearnModel.batch_size=2", "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", ] # Include background only for classification task diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 3d55b150..d0f3649e 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -6,6 +6,7 @@ import pathlib import numpy as np import os +import tables import tensorflow as tf import keras @@ -43,7 +44,50 @@ classes_with_traits, ) from ctapipe.monitoring.interpolation import PointingInterpolator +from ctapipe.instrument import SubarrayDescription from ctapipe.io import read_table, write_table, HDF5Merger +from ctapipe.io.hdf5dataformat import ( + DL0_TEL_POINTING_GROUP, + DL1_CAMERA_COEFFICIENTS_GROUP, + DL1_COLUMN_NAMES, + DL1_IMAGE_STATISTICS_TABLE, + DL1_PIXEL_STATISTICS_GROUP, + DL1_SUBARRAY_GROUP, + DL1_SUBARRAY_POINTING_GROUP, + DL1_SUBARRAY_TRIGGER_TABLE, + DL1_TEL_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_TRIGGER_TABLE, + DL2_EVENT_STATISTICS_GROUP, + DL2_SUBARRAY_CROSS_CALIBRATION_GROUP, + DL2_SUBARRAY_INTER_CALIBRATION_GROUP, + DL2_TEL_GROUP, + FIXED_POINTING_GROUP, + OBSERVATION_BLOCK_TABLE, + R0_TEL_GROUP, + R1_TEL_GROUP, + SCHEDULING_BLOCK_TABLE, + SHOWER_DISTRIBUTION_TABLE, + SIMULATION_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, + SIMULATION_RUN_TABLE, + SIMULATION_SHOWER_TABLE, + DL2_TEL_PARTICLETYPE_GROUP, + DL2_TEL_ENERGY_GROUP, + DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_PARTICLETYPE_GROUP, + DL2_SUBARRAY_ENERGY_GROUP, + DL2_SUBARRAY_GEOMETRY_GROUP, +) from ctapipe.reco.reconstructor import ReconstructionProperty from ctapipe.reco.stereo_combination import StereoCombiner from ctapipe.reco.utils import add_defaults_and_meta @@ -54,17 +98,29 @@ ) from ctlearn.core.loader import DLDataLoader -SIMULATION_CONFIG_TABLE = "/configuration/simulation/run" -FIXED_POINTING_GROUP = "/configuration/telescope/pointing" -POINTING_GROUP = "/dl1/monitoring/telescope/pointing" -SUBARRAY_POINTING_GROUP = "/dl1/monitoring/subarray/pointing" -DL1_TELESCOPE_GROUP = "/dl1/event/telescope" -DL1_SUBARRAY_GROUP = "/dl1/event/subarray" -DL2_SUBARRAY_GROUP = "/dl2/event/subarray" -DL2_TELESCOPE_GROUP = "/dl2/event/telescope" +# Convienient constants for column names and table keys +CONFIG_INSTRUMENT_SUBARRAY_LAYOUT = "/configuration/instrument/subarray/layout" +CONFIG_INSTRUMENT_TEL = "/configuration/instrument/telescope" +CONFIG_INSTRUMENT_TEL_CAMERA = "/configuration/instrument/telescope/camera" SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] -TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] - +TEL_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] +TEL_ITER_GROUPS = [ + R0_TEL_GROUP, + R1_TEL_GROUP, + FIXED_POINTING_GROUP, + DL0_TEL_POINTING_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_CALIBRATION_GROUP, + DL1_TEL_ILLUMINATOR_THROUGHPUT_GROUP, + DL1_TEL_MUON_THROUGHPUT_GROUP, + DL1_TEL_OPTICAL_PSF_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_IMAGES_GROUP, + DL1_TEL_MUON_GROUP, + SIMULATION_IMAGES_GROUP, + SIMULATION_IMPACT_GROUP, + SIMULATION_PARAMETERS_GROUP, +] class PredictCTLearnModel(Tool): """ @@ -82,8 +138,6 @@ class PredictCTLearnModel(Tool): ---------- input_url : pathlib.Path Input ctapipe HDF5 files including pixel-wise image or waveform data. - use_HDF5Merger : bool - Set whether to use the HDF5Merger component to copy the selected tables from the input file to the output file. dl1_features : bool Set whether to include the dl1 feature vectors in the output file. dl2_telescope : bool @@ -112,8 +166,6 @@ class PredictCTLearnModel(Tool): of the primary particle arrival direction based on spherical coordinate offsets. output_path : pathlib.Path Output path to save the dl2 prediction results. - overwrite_tables : bool - Overwrite the table in the output file if it exists. keras_verbose : int Verbosity mode of Keras during the prediction. strategy : tf.distribute.Strategy @@ -135,7 +187,7 @@ class PredictCTLearnModel(Tool): Finish the tool. _predict_with_model(model_path) Load and predict with a CTLearn model. - _predict_classification(example_identifiers) + _predict_particletype(example_identifiers) Predict the classification of the primary particle type. _predict_energy(example_identifiers) Predict the energy of the primary particle. @@ -151,7 +203,7 @@ class PredictCTLearnModel(Tool): Create a table with NaNs for missing predictions. _store_pointing(all_identifiers) Store the telescope pointing table from to the output file. - _create_feature_vectors_table(example_identifiers, nonexample_identifiers, classification_feature_vectors, energy_feature_vectors, direction_feature_vectors) + _create_feature_vectors_table(example_identifiers, nonexample_identifiers, particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors) Create the table for the DL1 feature vectors. """ @@ -163,16 +215,6 @@ class PredictCTLearnModel(Tool): file_ok=True, ).tag(config=True) - use_HDF5Merger = Bool( - default_value=True, - allow_none=False, - help=( - "Set whether to use the HDF5Merger component to copy the selected tables " - "from the input file to the output file. CAUTION: This can only be used " - "if the output file not exists." - ), - ).tag(config=True) - dl1_features = Bool( default_value=False, allow_none=False, @@ -279,12 +321,6 @@ class PredictCTLearnModel(Tool): help="Output path to save the dl2 prediction results", ).tag(config=True) - overwrite_tables = Bool( - default_value=True, - allow_none=False, - help="Overwrite the table in the output file if it exists", - ).tag(config=True) - keras_verbose = Int( default_value=1, min=0, @@ -309,6 +345,12 @@ class PredictCTLearnModel(Tool): } flags = { + **flag( + "overwrite" , + "HDF5Merger.overwrite", + "Overwrite the output file if it exists", + "Do not overwrite the output file if it exists", + ), **flag( "dl1-features", "PredictCTLearnModel.dl1_features", @@ -327,12 +369,6 @@ class PredictCTLearnModel(Tool): "Include dl2 telescope-event-wise data in the output file", "Exclude dl2 telescope-event-wise data in the output file", ), - **flag( - "use-HDF5Merger", - "PredictCTLearnModel.use_HDF5Merger", - "Copy data using the HDF5Merger component (CAUTION: This can not be used if the output file already exists)", - "Do not copy data using the HDF5Merger component", - ), **flag( "r0-waveforms", "HDF5Merger.r0_waveforms", @@ -374,22 +410,10 @@ class PredictCTLearnModel(Tool): classes = classes_with_traits(DLDataReader) def setup(self): - # Check if the ctapipe HDF5Merger component is enabled - if self.use_HDF5Merger: - if os.path.exists(self.output_path): - raise ToolConfigurationError( - f"The output file '{self.output_path}' already exists. Please use " - "'--no-use-HDF5Merger' to disable the usage of the HDF5Merger component." - ) - # Copy selected tables from the input file to the output file - self.log.info("Copying to output destination.") - with HDF5Merger(self.output_path, parent=self) as merger: - merger(self.input_url) - else: - self.log.info( - "No copy to output destination, since the usage of the HDF5Merger component is disabled." - ) - + # Copy selected tables from the input file to the output file + self.log.info("Copying to output destination.") + with HDF5Merger(self.output_path, parent=self) as merger: + merger(self.input_url) # Create a MirroredStrategy. self.strategy = tf.distribute.MirroredStrategy() atexit.register(self.strategy._extended._collective_ops._lock.locked) # type: ignore @@ -415,10 +439,197 @@ def setup(self): self.last_batch_size = len(self.indices) % ( self.batch_size * self.strategy.num_replicas_in_sync ) + # Ensure subarray consistency in the output file + self._ensure_subarray_consistency() def finish(self): self.log.info("Tool is shutting down") + def _ensure_subarray_consistency(self): + """ + Align subarray metadata and trigger tables with the selected telescopes. + + When only a subset of telescopes is processed, overwrite the output file's + SubarrayDescription and trim the DL1 trigger tables to keep events that + involve the selected telescopes. Also rebuild the subarray trigger table + with the corresponding telescope participation masks. + """ + + input_subarray = SubarrayDescription.from_hdf( + self.input_url, + focal_length_choice=self.dl1dh_reader.focal_length_choice, + ) + if input_subarray.__eq__(self.dl1dh_reader.subarray): + return + + self.dl1dh_reader.subarray.to_hdf(self.output_path, overwrite=True) + self.log.info("SubarrayDescription was stored in '%s'", self.output_path) + + tel_trigger_table = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + mask = np.isin(tel_trigger_table["tel_id"], self.dl1dh_reader.tel_ids) + tel_trigger_table = tel_trigger_table[mask] + tel_trigger_table.sort(TEL_EVENT_KEYS) + + write_table( + tel_trigger_table, + self.output_path, + DL1_TEL_TRIGGER_TABLE, + overwrite=True, + ) + + subarray_trigger_table = tel_trigger_table.copy() + subarray_trigger_table.keep_columns( + SUBARRAY_EVENT_KEYS + ["time", "event_type"] + ) + subarray_trigger_table = unique( + subarray_trigger_table, keys=SUBARRAY_EVENT_KEYS + ) + + tel_trigger_groups = tel_trigger_table.group_by(SUBARRAY_EVENT_KEYS) + tel_with_trigger = [] + for tel_trigger in tel_trigger_groups.groups: + tel_with_trigger_mask = np.zeros( + len(self.dl1dh_reader.tel_ids), dtype=bool + ) + tel_with_trigger_mask[ + self.dl1dh_reader.subarray.tel_ids_to_indices( + tel_trigger["tel_id"] + ) + ] = True + tel_with_trigger.append(tel_with_trigger_mask) + + subarray_trigger_table.add_column( + tel_with_trigger, index=-2, name="tels_with_trigger" + ) + + write_table( + subarray_trigger_table, + self.output_path, + DL1_SUBARRAY_TRIGGER_TABLE, + overwrite=True, + ) + # Update the simulation shower table to keep only events present in the subarray trigger table + subarray_trigger_table.keep_columns(SUBARRAY_EVENT_KEYS) + sim_shower_table = read_table( + self.output_path, + SIMULATION_SHOWER_TABLE, + ) + sim_shower_table = join( + sim_shower_table, + subarray_trigger_table, + keys=SUBARRAY_EVENT_KEYS, + join_type="right" + ) + sim_shower_table.sort(SUBARRAY_EVENT_KEYS) + write_table( + sim_shower_table, + self.output_path, + SIMULATION_SHOWER_TABLE, + overwrite=True, + ) + # Delete telescope-specific tables for unselected telescopes + self._delete_unselected_telescope_tables() + + def _delete_unselected_telescope_tables(self): + """ + Delete telescope-specific tables for unselected telescopes from the output file. + + Iterates through all telescope-related groups in the HDF5 file and removes + tables corresponding to telescopes that are not in the selected telescope list. + This ensures the output file only contains data for the telescopes that were + processed. The camera configuration tables are also pruned based on the camera indices. + """ + # Open the HDF5 file to prune the unselected telescope tables and camera configurations + with tables.open_file(self.output_path, mode="r+") as h5_file: + + def prune_group(group, valid_ids): + for table in group._f_iter_nodes("Table"): + idx = int(table._v_name.split("_")[-1]) + if idx not in valid_ids: + table._f_remove() + + # Telescope-specific tables + tel_ids = set(self.dl1dh_reader.tel_ids) + for group_name in TEL_ITER_GROUPS: + group = getattr(h5_file.root, group_name, None) + if group is not None: + prune_group(group, tel_ids) + + #Camera configuration tables + layout_node = getattr(h5_file.root, CONFIG_INSTRUMENT_SUBARRAY_LAYOUT, None) + camera_group = getattr(h5_file.root, CONFIG_INSTRUMENT_TEL_CAMERA, None) + if not (layout_node and camera_group): + return + # layout can be either a Table or a Group containing a Table + layout_table = ( + layout_node + if isinstance(layout_node, tables.Table) + else next(layout_node._f_iter_nodes("Table")) + ) + camera_indices = set(layout_table.col("camera_index")) + prune_group(camera_group, camera_indices) + + def _create_nan_table(self, nonexample_identifiers, columns, shapes): + """ + Create a table with NaNs for missing predictions. + + This method creates a table with NaNs for missing predictions for the non-example identifiers. + In stereo mode, the table also a column for the valid telescopes is added with all False values. + + Parameters: + ----------- + nonexample_identifiers : astropy.table.Table + Table containing the non-example identifiers. + columns : list of str + List of column names to create in the table. + shapes : list of shapes + List of shapes for the columns to create in the table. + + Returns: + -------- + nan_table : astropy.table.Table + Table containing NaNs for missing predictions. + """ + # Create a table with NaNs for missing predictions + nan_table = nonexample_identifiers.copy() + for column_name, shape in zip(columns, shapes): + nan_table.add_column(np.full(shape, np.nan), name=column_name) + # Add that no telescope is valid for the non-example identifiers in stereo mode + if self.dl1dh_reader.mode == "stereo": + nan_table.add_column( + np.zeros( + (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), + dtype=bool, + ), + name=f"{self.prefix}_telescopes", + ) + return nan_table + + def deduplicate_first_valid( + self, + table: Table, + keys=('obs_id', 'event_id'), + valid_col='CTLearn_is_valid', + ): + """ + Return a deduplicated Astropy Table. + + For each group defined by `keys`, keep the first row where + `valid_col` is True. If none are valid, keep the first row. + """ + + t = table.copy() + + t.sort( + list(keys) + [valid_col], + reverse=[False] * len(keys) + [True] + ) + + return unique(t, keys=list(keys), keep='first') + def _predict_with_model(self, model_path): """ Load and predict with a CTLearn model. @@ -482,9 +693,18 @@ def _predict_with_model(self, model_path): x = layer(x) head = keras.Model(inputs=backbone_output_shape, outputs=x) # Apply the backbone model with the data loader to retrieve the feature vectors - feature_vectors = backbone_model.predict( - data_loader, verbose=self.keras_verbose - ) + try: + feature_vectors = backbone_model.predict( + data_loader, verbose=self.keras_verbose + ) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Apply the head model with the feature vectors to retrieve the prediction predict_data = Table( { @@ -516,7 +736,16 @@ def _predict_with_model(self, model_path): ) else: # Predict the data using the loaded model - predict_data = model.predict(data_loader, verbose=self.keras_verbose) + try: + predict_data = model.predict(data_loader, verbose=self.keras_verbose) + except ValueError as err: + if str(err).startswith("Input 0 of layer"): + raise ToolConfigurationError( + "Model input shape does not match the prediction data. " + "This is usually caused by selecting the wrong telescope_id. " + "Please ensure the telescope configuration matches the one used for training." + ) from err + raise # Create a astropy table with the prediction results # The classification task has a softmax layer as the last layer # which returns the probabilities for each class in an array, while @@ -540,7 +769,7 @@ def _predict_with_model(self, model_path): predict_data = vstack([predict_data, predict_data_last_batch]) return predict_data, feature_vectors - def _predict_classification(self, example_identifiers): + def _predict_particletype(self, example_identifiers): """ Predict the classification of the primary particle type. @@ -550,7 +779,7 @@ def _predict_classification(self, example_identifiers): Parameters: ----------- - classification_table : astropy.table.Table + particletype_table : astropy.table.Table Table containing the example identifiers with an additional column for the predicted classification score ('gammaness'). feature_vectors : np.ndarray @@ -564,11 +793,11 @@ def _predict_classification(self, example_identifiers): self.load_type_model_from ) # Create prediction table and add the predicted classification score ('gammaness') - classification_table = example_identifiers.copy() - classification_table.add_column( + particletype_table = example_identifiers.copy() + particletype_table.add_column( predict_data["type"].T[1], name=f"{self.prefix}_tel_prediction" ) - return classification_table, feature_vectors + return particletype_table, feature_vectors def _predict_energy(self, example_identifiers): """ @@ -813,64 +1042,6 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: ) return table - def _create_nan_table(self, nonexample_identifiers, columns, shapes): - """ - Create a table with NaNs for missing predictions. - - This method creates a table with NaNs for missing predictions for the non-example identifiers. - In stereo mode, the table also a column for the valid telescopes is added with all False values. - - Parameters: - ----------- - nonexample_identifiers : astropy.table.Table - Table containing the non-example identifiers. - columns : list of str - List of column names to create in the table. - shapes : list of shapes - List of shapes for the columns to create in the table. - - Returns: - -------- - nan_table : astropy.table.Table - Table containing NaNs for missing predictions. - """ - # Create a table with NaNs for missing predictions - nan_table = nonexample_identifiers.copy() - for column_name, shape in zip(columns, shapes): - nan_table.add_column(np.full(shape, np.nan), name=column_name) - # Add that no telescope is valid for the non-example identifiers in stereo mode - if self.dl1dh_reader.mode == "stereo": - nan_table.add_column( - np.zeros( - (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), - dtype=bool, - ), - name=f"{self.prefix}_telescopes", - ) - return nan_table - - def deduplicate_first_valid( - self, - table: Table, - keys=('obs_id', 'event_id'), - valid_col='CTLearn_is_valid', - ): - """ - Return a deduplicated Astropy Table. - - For each group defined by `keys`, keep the first row where - `valid_col` is True. If none are valid, keep the first row. - """ - - t = table.copy() - - t.sort( - list(keys) + [valid_col], - reverse=[False] * len(keys) + [True] - ) - - return unique(t, keys=list(keys), keep='first') - def _store_pointing(self, all_identifiers): """ Store the telescope pointing table from to the output file. @@ -912,13 +1083,12 @@ def _store_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info = vstack(pointing_info) if self.dl1dh_reader.mode == "stereo": @@ -947,13 +1117,12 @@ def _store_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info @@ -961,7 +1130,7 @@ def _create_feature_vectors_table( self, example_identifiers, nonexample_identifiers=None, - classification_feature_vectors=None, + particletype_feature_vectors=None, energy_feature_vectors=None, direction_feature_vectors=None, ): @@ -978,8 +1147,8 @@ def _create_feature_vectors_table( Table containing the example identifiers. nonexample_identifiers : astropy.table.Table or None Table containing the non-example identifiers to fill the NaNs. - classification_feature_vectors : np.ndarray or None - Array containing the classification feature vectors. + particletype_feature_vectors : np.ndarray or None + Array containing the particletype feature vectors. energy_feature_vectors : np.ndarray or None Array containing the energy feature vectors. direction_feature_vectors : np.ndarray or None @@ -993,20 +1162,20 @@ def _create_feature_vectors_table( # Create the feature vector table feature_vector_table = example_identifiers.copy() columns_list, shapes_list = [], [] - if classification_feature_vectors is not None: + if particletype_feature_vectors is not None: is_valid_col = ~np.isnan( - np.min(classification_feature_vectors, axis=1), dtype=bool + np.min(particletype_feature_vectors, axis=1), dtype=bool ) feature_vector_table.add_column( - classification_feature_vectors, - name=f"{self.prefix}_tel_classification_feature_vectors", + particletype_feature_vectors, + name=f"{self.prefix}_tel_particletype_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_classification_feature_vectors") + columns_list.append(f"{self.prefix}_tel_particletype_feature_vectors") shapes_list.append( ( len(nonexample_identifiers), - classification_feature_vectors.shape[1], + particletype_feature_vectors.shape[1], ) ) if energy_feature_vectors is not None: @@ -1102,11 +1271,9 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/mono/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono/cameradirection/ctlearn_model.cpk" \\ --dl1-features \\ - --use-HDF5Merger \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ To predict from pixel-wise waveform data in mono mode using trained CTLearn models: > ctlearn-predict-mono-model \\ @@ -1117,13 +1284,11 @@ class MonoPredictCTLearnModel(PredictCTLearnModel): --type_model="/path/to/your/mono_waveform/type/ctlearn_model.cpk" \\ --energy_model="/path/to/your/mono_waveform/energy/ctlearn_model.cpk" \\ --cameradirection_model="/path/to/your/mono_waveform/cameradirection/ctlearn_model.cpk" \\ - --use-HDF5Merger \\ --no-r0-waveforms \\ --no-r1-waveforms \\ --no-dl1-images \\ --no-true-images \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ stereo_combiner_cls = ComponentName( @@ -1136,11 +1301,14 @@ def start(self): self.log.info("Processing the telescope pointings...") # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.example_identifiers.copy() - example_identifiers.keep_columns(TELESCOPE_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.tel_trigger_table.copy() - all_identifiers.keep_columns(TELESCOPE_EVENT_KEYS + ["time"]) + example_identifiers.keep_columns(TEL_EVENT_KEYS) + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) + all_identifiers.keep_columns(TEL_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( - all_identifiers, example_identifiers, keys=TELESCOPE_EVENT_KEYS + all_identifiers, example_identifiers, keys=TEL_EVENT_KEYS ) nonexample_identifiers.remove_column("time") # Pointing table for the mono mode for MC simulation @@ -1152,111 +1320,111 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: - self.type_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.PARTICLE_TYPE, - parent=self, + # Predict the type of the primary particle + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) - # Predict the energy of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefix}_tel_prediction"], + shapes=[(len(nonexample_identifiers),)], + ) + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particle type table + particletype_table.add_column( + ~np.isnan( + particletype_table[f"{self.prefix}_tel_prediction"].data, + dtype=bool, + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + particletype_table, + ParticleClassificationContainer, + prefix=self.prefix, + add_tel_prefix=True, ) if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], - shapes=[(len(nonexample_identifiers),)], - ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the energy table - classification_table.add_column( - ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - classification_table, - ParticleClassificationContainer, - prefix=self.prefix, - add_tel_prefix=True, - ) for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope - telescope_mask = classification_table["tel_id"] == tel_id - classification_tel_table = classification_table[telescope_mask] - classification_tel_table.sort(TELESCOPE_EVENT_KEYS) + telescope_mask = particletype_table["tel_id"] == tel_id + particletype_tel_table = particletype_table[telescope_mask] + particletype_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file for the selected telescope write_table( - classification_tel_table, + particletype_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) + if self.dl2_subarray: self.log.info("Processing and storing the subarray type prediction...") - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: - classification_subarray_table = classification_table.copy() + particletype_subarray_table = particletype_table.copy() telescope_mask = ( - classification_subarray_table["tel_id"] + particletype_subarray_table["tel_id"] == self.dl1dh_reader.tel_ids[0] ) - classification_subarray_table = classification_subarray_table[ + particletype_subarray_table = particletype_subarray_table[ telescope_mask ] - classification_subarray_table.remove_column("tel_id") - for colname in classification_subarray_table.colnames: + particletype_subarray_table.remove_column("tel_id") + for colname in particletype_subarray_table.colnames: if "_tel_" in colname: - classification_subarray_table.rename_column( + particletype_subarray_table.rename_column( colname, colname.replace("_tel", "") ) - classification_subarray_table.add_column( + particletype_subarray_table.add_column( [ [val] - for val in classification_subarray_table[ + for val in particletype_subarray_table[ f"{self.prefix}_is_valid" ] ], name=f"{self.prefix}_telescopes", ) else: + self.type_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.PARTICLE_TYPE, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner - classification_subarray_table = ( - self.type_stereo_combiner.predict_table(classification_table) + particletype_subarray_table = ( + self.type_stereo_combiner.predict_table(particletype_table) ) # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - classification_subarray_table[f"{self.prefix}_telescopes"].dtype + particletype_subarray_table[f"{self.prefix}_telescopes"].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination reco_telescopes = np.zeros( ( - len(classification_subarray_table), + len(particletype_subarray_table), len(self.dl1dh_reader.tel_ids), ), dtype=bool, ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - classification_subarray_table[f"{self.prefix}_telescopes"] + particletype_subarray_table[f"{self.prefix}_telescopes"] ): if not tel_id_mask: continue @@ -1267,88 +1435,80 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - classification_subarray_table[f"{self.prefix}_telescopes"] = ( + particletype_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event - classification_subarray_table = super().deduplicate_first_valid( - table=classification_subarray_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_subarray_table = super().deduplicate_first_valid( + table=particletype_subarray_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefix}_is_valid", ) - # Sort the subarray classification table - classification_subarray_table.sort(SUBARRAY_EVENT_KEYS) + # Sort the subarray particletype table + particletype_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( - classification_subarray_table, + particletype_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) energy_feature_vectors = None if self.load_energy_model_from is not None: - self.energy_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.ENERGY, - parent=self, - ) # Predict the energy of the primary particle energy_table, energy_feature_vectors = super()._predict_energy( example_identifiers ) - if self.dl2_telescope: - # Produce output table with NaNs for missing predictions - if len(nonexample_identifiers) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], - shapes=[(len(nonexample_identifiers),)], - ) - energy_table = vstack([energy_table, nan_table]) - # Add is_valid column to the energy table - energy_table.add_column( - ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - energy_table, - ReconstructedEnergyContainer, - prefix=self.prefix, - add_tel_prefix=True, + # Produce output table with NaNs for missing predictions + if len(nonexample_identifiers) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers, + columns=[f"{self.prefix}_tel_energy"], + shapes=[(len(nonexample_identifiers),)], ) + energy_table = vstack([energy_table, nan_table]) + # Add is_valid column to the energy table + energy_table.add_column( + ~np.isnan( + energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + energy_table, + ReconstructedEnergyContainer, + prefix=self.prefix, + add_tel_prefix=True, + ) + if self.dl2_telescope: for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = energy_table["tel_id"] == tel_id energy_tel_table = energy_table[telescope_mask] - energy_tel_table.sort(TELESCOPE_EVENT_KEYS) + energy_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( energy_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( "Processing and storing the subarray energy prediction..." ) - # If only one telescope is used, copy the classification table + # If only one telescope is used, copy the particletype table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: energy_subarray_table = energy_table.copy() @@ -1370,6 +1530,12 @@ def start(self): name=f"{self.prefix}_telescopes", ) else: + self.energy_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.ENERGY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner energy_subarray_table = self.energy_stereo_combiner.predict_table( energy_table @@ -1404,7 +1570,7 @@ def start(self): energy_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event + # Deduplicate the subarray energy table to have only one entry per event energy_subarray_table = super().deduplicate_first_valid( table=energy_subarray_table, keys=SUBARRAY_EVENT_KEYS, @@ -1416,86 +1582,78 @@ def start(self): write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) direction_feature_vectors = None if self.load_cameradirection_model_from is not None: - self.geometry_stereo_combiner = StereoCombiner.from_name( - self.stereo_combiner_cls, - prefix=self.prefix, - property=ReconstructionProperty.GEOMETRY, - parent=self, - ) # Join the prediction table with the telescope pointing table example_identifiers = join( left=example_identifiers, right=pointing_info, - keys=TELESCOPE_EVENT_KEYS, + keys=TEL_EVENT_KEYS, ) # Predict the arrival direction of the primary particle direction_table, direction_feature_vectors = ( super()._predict_cameradirection(example_identifiers) ) direction_tel_tables = [] - if self.dl2_telescope: - for tel_id in self.dl1dh_reader.selected_telescopes[ - self.dl1dh_reader.tel_type - ]: - # Retrieve the example identifiers for the selected telescope - telescope_mask = direction_table["tel_id"] == tel_id - direction_tel_table = direction_table[telescope_mask] - direction_tel_table = super()._transform_cam_coord_offsets_to_sky( - direction_tel_table - ) - # Produce output table with NaNs for missing predictions - nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id - nonexample_identifiers_tel = nonexample_identifiers[ - nan_telescope_mask - ] - if len(nonexample_identifiers_tel) > 0: - nan_table = super()._create_nan_table( - nonexample_identifiers_tel, - columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], - shapes=[ - (len(nonexample_identifiers_tel),), - (len(nonexample_identifiers_tel),), - ], - ) - direction_tel_table = vstack([direction_tel_table, nan_table]) - direction_tel_table.sort(TELESCOPE_EVENT_KEYS) - # Add is_valid column to the direction table - direction_tel_table.add_column( - ~np.isnan( - direction_tel_table[f"{self.prefix}_tel_alt"].data, - dtype=bool, - ), - name=f"{self.prefix}_tel_is_valid", - ) - # Add the default values and meta data to the table - add_defaults_and_meta( - direction_tel_table, - ReconstructedGeometryContainer, - prefix=self.prefix, - add_tel_prefix=True, + for tel_id in self.dl1dh_reader.selected_telescopes[ + self.dl1dh_reader.tel_type + ]: + # Retrieve the example identifiers for the selected telescope + telescope_mask = direction_table["tel_id"] == tel_id + direction_tel_table = direction_table[telescope_mask] + direction_tel_table = super()._transform_cam_coord_offsets_to_sky( + direction_tel_table + ) + # Produce output table with NaNs for missing predictions + nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id + nonexample_identifiers_tel = nonexample_identifiers[ + nan_telescope_mask + ] + if len(nonexample_identifiers_tel) > 0: + nan_table = super()._create_nan_table( + nonexample_identifiers_tel, + columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], + shapes=[ + (len(nonexample_identifiers_tel),), + (len(nonexample_identifiers_tel),), + ], ) - direction_tel_tables.append(direction_tel_table) + direction_tel_table = vstack([direction_tel_table, nan_table]) + direction_tel_table.sort(TEL_EVENT_KEYS) + # Add is_valid column to the direction table + direction_tel_table.add_column( + ~np.isnan( + direction_tel_table[f"{self.prefix}_tel_alt"].data, + dtype=bool, + ), + name=f"{self.prefix}_tel_is_valid", + ) + # Add the default values and meta data to the table + add_defaults_and_meta( + direction_tel_table, + ReconstructedGeometryContainer, + prefix=self.prefix, + add_tel_prefix=True, + ) + direction_tel_tables.append(direction_tel_table) + if self.dl2_telescope: # Save the prediction to the output file write_table( direction_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( @@ -1504,7 +1662,7 @@ def start(self): # Stack the telescope tables to the subarray table direction_tel_tables = vstack(direction_tel_tables) # Sort the table by the telescope event keys - direction_tel_tables.sort(TELESCOPE_EVENT_KEYS) + direction_tel_tables.sort(TEL_EVENT_KEYS) # If only one telescope is used, copy the classification table # and modify it to subarray format if len(self.dl1dh_reader.tel_ids) == 1: @@ -1530,6 +1688,12 @@ def start(self): name=f"{self.prefix}_telescopes", ) else: + self.geometry_stereo_combiner = StereoCombiner.from_name( + self.stereo_combiner_cls, + prefix=self.prefix, + property=ReconstructionProperty.GEOMETRY, + parent=self, + ) # Combine the telescope predictions to the subarray prediction using the stereo combiner direction_subarray_table = ( self.geometry_stereo_combiner.predict_table( @@ -1566,7 +1730,7 @@ def start(self): direction_subarray_table[f"{self.prefix}_telescopes"] = ( reco_telescopes ) - # Deduplicate the subarray classification table to have only one entry per event + # Deduplicate the subarray direction table to have only one entry per event direction_subarray_table = super().deduplicate_first_valid( table=direction_subarray_table, keys=SUBARRAY_EVENT_KEYS, @@ -1578,13 +1742,12 @@ def start(self): write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: @@ -1592,31 +1755,30 @@ def start(self): feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. for tel_id in self.dl1dh_reader.selected_telescopes[ self.dl1dh_reader.tel_type ]: # Retrieve the example identifiers for the selected telescope telescope_mask = feature_vector_table["tel_id"] == tel_id feature_vectors_tel_table = feature_vector_table[telescope_mask] - feature_vectors_tel_table.sort(TELESCOPE_EVENT_KEYS) + feature_vectors_tel_table.sort(TEL_EVENT_KEYS) # Save the prediction to the output file write_table( feature_vectors_tel_table, self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", ) def _store_mc_telescope_pointing(self, all_identifiers): @@ -1644,7 +1806,7 @@ def _store_mc_telescope_pointing(self, all_identifiers): keys=["obs_id", "tel_id"], ) # TODO: use keep_order for astropy v7.0.0 - tel_pointing.sort(TELESCOPE_EVENT_KEYS) + tel_pointing.sort(TEL_EVENT_KEYS) # Retrieve the example identifiers for the selected telescope tel_pointing_table = Table( { @@ -1656,13 +1818,12 @@ def _store_mc_telescope_pointing(self, all_identifiers): write_table( tel_pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", - overwrite=self.overwrite_tables, + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{tel_id:03d}", ) pointing_info.append(tel_pointing) pointing_info = vstack(pointing_info) @@ -1713,7 +1874,6 @@ class StereoPredictCTLearnModel(PredictCTLearnModel): --energy_model="/path/to/your/stereo/energy/ctlearn_model.cpk" \\ --skydirection_model="/path/to/your/stereo/skydirection/ctlearn_model.cpk" \\ --output output.dl2.h5 \\ - --PredictCTLearnModel.overwrite_tables=True \\ """ def start(self): @@ -1721,7 +1881,10 @@ def start(self): # Retrieve the IDs from the dl1dh for the prediction tables example_identifiers = self.dl1dh_reader.unique_example_identifiers.copy() example_identifiers.keep_columns(SUBARRAY_EVENT_KEYS) - all_identifiers = self.dl1dh_reader.subarray_trigger_table.copy() + all_identifiers = read_table( + self.output_path, + DL1_TEL_TRIGGER_TABLE, + ) all_identifiers.keep_columns(SUBARRAY_EVENT_KEYS + ["time"]) nonexample_identifiers = setdiff( all_identifiers, example_identifiers, keys=SUBARRAY_EVENT_KEYS @@ -1750,11 +1913,11 @@ def start(self): pointing_info = super()._store_pointing(all_identifiers) self.log.info("Starting the prediction...") - classification_feature_vectors = None + particletype_feature_vectors = None if self.load_type_model_from is not None: # Predict the classification of the primary particle - classification_table, classification_feature_vectors = ( - super()._predict_classification(example_identifiers) + particletype_table, particletype_feature_vectors = ( + super()._predict_particletype(example_identifiers) ) if self.dl2_subarray: # Produce output table with NaNs for missing predictions @@ -1764,46 +1927,46 @@ def start(self): columns=[f"{self.prefix}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], ) - classification_table = vstack([classification_table, nan_table]) - # Add is_valid column to the classification table - classification_table.add_column( + particletype_table = vstack([particletype_table, nan_table]) + # Add is_valid column to the particletype table + particletype_table.add_column( ~np.isnan( - classification_table[f"{self.prefix}_tel_prediction"].data, + particletype_table[f"{self.prefix}_tel_prediction"].data, dtype=bool, ), name=f"{self.prefix}_tel_is_valid", ) # Rename the columns for the stereo mode - classification_table.rename_column( + particletype_table.rename_column( f"{self.prefix}_tel_prediction", f"{self.prefix}_prediction" ) - classification_table.rename_column( + particletype_table.rename_column( f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" ) - # Deduplicate the subarray classification table to have only one entry per event - classification_table = super().deduplicate_first_valid( - table=classification_table, + # Deduplicate the subarray particletype table to have only one entry per event + particletype_table = super().deduplicate_first_valid( + table=particletype_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefix}_is_valid", ) - classification_table.sort(SUBARRAY_EVENT_KEYS) + particletype_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( - classification_table, + particletype_table, ParticleClassificationContainer, prefix=self.prefix, ) # Save the prediction to the output file write_table( - classification_table, + particletype_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", overwrite=self.overwrite_tables, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", ) energy_feature_vectors = None if self.load_energy_model_from is not None: @@ -1851,13 +2014,12 @@ def start(self): write_table( energy_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", ) direction_feature_vectors = None if self.load_skydirection_model_from is not None: @@ -1909,13 +2071,12 @@ def start(self): write_table( direction_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", - overwrite=self.overwrite_tables, + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", ) # Create the feature vector table if the DL1 features are enabled @@ -1924,17 +2085,17 @@ def start(self): feature_vector_table = super()._create_feature_vectors_table( example_identifiers, nonexample_identifiers, - classification_feature_vectors, + particletype_feature_vectors, energy_feature_vectors, direction_feature_vectors, ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TELESCOPE_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. # Rename the columns for the stereo mode feature_vector_table.rename_column( - f"{self.prefix}_tel_classification_feature_vectors", - f"{self.prefix}_classification_feature_vectors", + f"{self.prefix}_tel_particletype_feature_vectors", + f"{self.prefix}_particletype_feature_vectors", ) feature_vector_table.rename_column( f"{self.prefix}_tel_energy_feature_vectors", @@ -1953,7 +2114,6 @@ def start(self): feature_vector_table, self.output_path, f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", - overwrite=self.overwrite_tables, ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", @@ -1973,7 +2133,7 @@ def _store_mc_subarray_pointing(self, all_identifiers): # Read the subarray pointing table pointing_info = read_table( self.input_url, - f"{SIMULATION_CONFIG_TABLE}", + SIMULATION_RUN_TABLE, ) # Assuming min_az = max_az and min_alt = max_alt pointing_info.keep_columns(["obs_id", "min_az", "min_alt"]) @@ -2001,13 +2161,12 @@ def _store_mc_subarray_pointing(self, all_identifiers): write_table( pointing_table, self.output_path, - f"{SUBARRAY_POINTING_GROUP}", - overwrite=self.overwrite_tables, + DL1_SUBARRAY_POINTING_GROUP, ) self.log.info( "DL1 subarray pointing table was stored in '%s' under '%s'", self.output_path, - f"{SUBARRAY_POINTING_GROUP}", + DL1_SUBARRAY_POINTING_GROUP, ) return pointing_info diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py index 55e6d97e..23f32352 100644 --- a/ctlearn/tools/tests/test_predict_model.py +++ b/ctlearn/tools/tests/test_predict_model.py @@ -1,11 +1,31 @@ import shutil import numpy as np +import pytest from ctapipe.core import run_tool from ctapipe.io import TableLoader from ctlearn.tools import MonoPredictCTLearnModel -def test_predict_model(ctlearn_trained_dl1_models, dl1_gamma_file, tmp_path): +# Columns that should be present in the output DL2 file +REQUIRED_COLUMNS = [ + "event_id", + "obs_id", + "CTLearn_alt", + "true_alt", + "CTLearn_az", + "true_az", + "CTLearn_prediction", + "true_shower_primary_id", + "CTLearn_energy", + "true_energy", + "CTLearn_is_valid", + "CTLearn_telescopes", + "tels_with_trigger" +] + + +@pytest.mark.parametrize("dl2_tel_flag", ["dl2-telescope", "no-dl2-telescope"]) +def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2_tel_flag): """ Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. @@ -24,54 +44,51 @@ def test_predict_model(ctlearn_trained_dl1_models, dl1_gamma_file, tmp_path): assert model_file.exists(), f"Trained model file not found for {reco_task}" # Build command-line arguments - output_file = dl2_dir / "gamma.dl2.h5" argv = [ f"--input_url={dl1_gamma_file}", - f"--output={output_file}", "--PredictCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_model_cameradirection.keras", + "--no-dl1-images", + "--no-true-images", + f"--{dl2_tel_flag}", ] - - # Run Prediction for energy and type together - assert run_tool( - MonoPredictCTLearnModel(), - argv = argv + [ - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", - "--use-HDF5Merger", - "--no-dl1-images", - "--no-true-images", - ], - cwd=tmp_path - ) == 0 - - assert run_tool( - MonoPredictCTLearnModel(), - argv= argv + [ - f"--PredictCTLearnModel.load_cameradirection_model_from=" - f"{model_dir}/ctlearn_model_cameradirection.keras", - "--no-use-HDF5Merger", - ], - cwd=tmp_path, - ) == 0 + # Test with different allowed telescope lists (single and multiple telescope IDs) + allowed_tels_dict = {"single_tel_id" : [7], "multiple_tel_ids": [7, 13, 15, 16, 17, 19]} + for name, allowed_tels in allowed_tels_dict.items(): + output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{name}.dl2.h5" + # Run Prediction tool + assert run_tool( + MonoPredictCTLearnModel(), + argv = argv + [ + f"--output={output_file}", + f"--DLImageReader.allowed_tels={allowed_tels}", + ], + cwd=tmp_path + ) == 0 - - allowed_tels = [7, 13, 15, 16, 17, 19] - required_columns = [ - "telescope_pointing_azimuth", - "telescope_pointing_altitude", - "CTLearn_alt", - "CTLearn_az", - "CTLearn_prediction", - "CTLearn_energy", - ] - # Check that the output DL2 file was created - assert output_file.exists(), "Output DL2 file not created" - # Check that the created DL2 file can be read with the TableLoader - with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: - events = loader.read_telescope_events_by_id(telescopes=allowed_tels) - for tel_id in allowed_tels: - assert len(events[tel_id]) > 0 - for col in required_columns: - assert col in events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" - assert events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id(telescopes=allowed_tels, dl1_parameters=True, dl2=True) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" + assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index ed661409..b32c5256 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -5,7 +5,7 @@ from ctapipe.core import run_tool from ctlearn.tools import TrainCTLearnModel -@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection", "skydirection"]) +@pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection"]) def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): """ Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. @@ -25,7 +25,8 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat # Output directory for trained model output_dir = tmp_path / f"ctlearn_{reco_task}" - + allowed_tels = [7, 13, 15, 16, 17, 19] + # Build command-line arguments argv = [ f"--signal={signal_dir}", @@ -35,6 +36,7 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat "--TrainCTLearnModel.n_epochs=2", "--TrainCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}" ] # Include background only for classification task @@ -63,14 +65,11 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat assert "val_loss" in log_df.columns, ( f"'val_loss' column missing in training_log.csv for {reco_task}" ) - val_loss_min= 0.0 - val_loss_max= 1.5 if reco_task == "skydirection" else 1.0 - # Check val_loss values are between 0.0 and 1.0 (or 1.5 for skydirection) val_loss = log_df["val_loss"].dropna() assert not val_loss.empty, ( f"'val_loss' column is empty for {reco_task}" ) - assert ((val_loss >= val_loss_min) & (val_loss <= val_loss_max)).all(), ( - f"'val_loss' values out of range [{val_loss_min}, {val_loss_max}] for {reco_task}: " + assert ((val_loss >= 0.0) & (val_loss <= 1.0)).all(), ( + f"'val_loss' values out of range [0.0, 1.0] for {reco_task}: " f"{val_loss.tolist()}" ) \ No newline at end of file From 842174fdd9b99284dbcc70461d21c82936deebe6 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Wed, 21 Jan 2026 17:23:04 +0100 Subject: [PATCH 18/27] fixing couple of stuff uses distinct CTLearn model prefixes for each reco tasks also train a LST models in conftest polish tests to test different image mappers and check for subarray and telescope tableloader reading --- ctlearn/conftest.py | 87 +++-- ctlearn/tools/predict_model.py | 372 ++++++++++++++-------- ctlearn/tools/tests/test_predict_model.py | 65 ++-- ctlearn/tools/train_model.py | 10 +- 4 files changed, 353 insertions(+), 181 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 63226540..485104da 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -4,6 +4,7 @@ import pytest import shutil +from traitlets.config.loader import Config from ctapipe.core import run_tool from ctapipe.utils import get_dataset_path @@ -101,36 +102,60 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory # Hardcopy DL1 proton file to the background directory shutil.copy(dl1_proton_file, background_dir) + # Configuration to disable quality cuts to increase + # training statistics mainly needed for LSTs. + config = Config( + { + "TableQualityQuery": { + "quality_criteria": [], + }, + }, + ) + + # Define telescope types and their allowed telescopes + telescope_types = { + "LST": [1, 2], + "MST": [7, 13, 15, 16, 17, 19], + #"SST": [30, 37, 43, 44, 53], + } + image_mapper_types = { + "LST": "BilinearMapper", + "MST": "OversamplingMapper", + #"SST": "SquareMapper", + } + # Loop over telescope types and reconstruction tasks + # and train models for each combination ctlearn_trained_dl1_models = {} - for reco_task in ["type", "energy", "cameradirection"]: - # Output directory for trained model - output_dir = tmp_path / f"ctlearn_{reco_task}" - - # Build command-line arguments - allowed_tels = [7, 13, 15, 16, 17, 19] - argv = [ - f"--signal={signal_dir}", - "--pattern-signal=*.dl1.h5", - f"--output={output_dir}", - f"--reco={reco_task}", - "--TrainCTLearnModel.n_epochs=1", - "--TrainCTLearnModel.batch_size=2", - "--DLImageReader.focal_length_choice=EQUIVALENT", - f"--DLImageReader.allowed_tels={allowed_tels}", - ] - - # Include background only for classification task - if reco_task == "type": - argv.extend([ - f"--background={background_dir}", - "--pattern-background=*.dl1.h5", - "--DLImageReader.enforce_subarray_equality=False", - ]) - - # Run training - assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 - - ctlearn_trained_dl1_models[reco_task] = output_dir / "ctlearn_model.keras" - # Check that the trained model exists - assert ctlearn_trained_dl1_models[reco_task].exists() + for telescope_type, allowed_tels in telescope_types.items(): + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_dl1_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + # Check that the trained model exists + assert ctlearn_trained_dl1_models[f"{telescope_type}_{reco_task}"].exists() return ctlearn_trained_dl1_models \ No newline at end of file diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index d0f3649e..a5d74e70 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -3,9 +3,10 @@ """ import atexit -import pathlib +import uuid +import warnings + import numpy as np -import os import tables import tensorflow as tf import keras @@ -21,6 +22,7 @@ setdiff, unique, ) +from astropy.time import Time from ctapipe.containers import ( ParticleClassificationContainer, @@ -35,12 +37,8 @@ Int, Path, flag, - Set, Dict, - List, - CaselessStrEnum, ComponentName, - Unicode, classes_with_traits, ) from ctapipe.monitoring.interpolation import PointingInterpolator @@ -48,10 +46,6 @@ from ctapipe.io import read_table, write_table, HDF5Merger from ctapipe.io.hdf5dataformat import ( DL0_TEL_POINTING_GROUP, - DL1_CAMERA_COEFFICIENTS_GROUP, - DL1_COLUMN_NAMES, - DL1_IMAGE_STATISTICS_TABLE, - DL1_PIXEL_STATISTICS_GROUP, DL1_SUBARRAY_GROUP, DL1_SUBARRAY_POINTING_GROUP, DL1_SUBARRAY_TRIGGER_TABLE, @@ -66,16 +60,9 @@ DL1_TEL_POINTING_GROUP, DL1_TEL_TRIGGER_TABLE, DL2_EVENT_STATISTICS_GROUP, - DL2_SUBARRAY_CROSS_CALIBRATION_GROUP, - DL2_SUBARRAY_INTER_CALIBRATION_GROUP, - DL2_TEL_GROUP, FIXED_POINTING_GROUP, - OBSERVATION_BLOCK_TABLE, R0_TEL_GROUP, R1_TEL_GROUP, - SCHEDULING_BLOCK_TABLE, - SHOWER_DISTRIBUTION_TABLE, - SIMULATION_GROUP, SIMULATION_IMAGES_GROUP, SIMULATION_IMPACT_GROUP, SIMULATION_PARAMETERS_GROUP, @@ -96,7 +83,9 @@ ProcessType, LST_EPOCH, ) +from ctlearn import __version__ as ctlearn_version from ctlearn.core.loader import DLDataLoader +from ctlearn.utils import validate_trait_dict # Convienient constants for column names and table keys CONFIG_INSTRUMENT_SUBARRAY_LAYOUT = "/configuration/instrument/subarray/layout" @@ -122,6 +111,10 @@ SIMULATION_PARAMETERS_GROUP, ] + +class CannotPredict(OSError): + """Raised when trying to predict an incompatible file""" + class PredictCTLearnModel(Tool): """ Base tool to predict the gammaness, energy and arrival direction from R1/DL1 data using CTLearn models. @@ -199,7 +192,7 @@ class PredictCTLearnModel(Tool): Transform to camera coordinate offsets w.r.t. the telescope pointing to Alt/Az coordinates. _transform_spher_coord_offsets_to_sky(table) Transform to spherical coordinate offsets w.r.t. the telescope pointing to Alt/Az coordinates. - _create_nan_table(nonexample_identifiers, columns, shapes) + _create_nan_table(nonexample_identifiers, columns, shapes, reco_task) Create a table with NaNs for missing predictions. _store_pointing(all_identifiers) Store the telescope pointing table from to the output file. @@ -255,10 +248,19 @@ class PredictCTLearnModel(Tool): ), ).tag(config=True) - prefix = Unicode( - default_value="CTLearn", + prefixes = Dict( + default_value={ + "type": "CTLearnClassifier", + "energy": "CTLearnRegressor", + "cameradirection": "CTLearnCameraReconstructor", + "skydirection": "CTLearnSkyReconstructor", + "all": "CTLearn", + }, allow_none=False, - help="Name of the reconstruction algorithm used to generate the dl2 data.", + help=( + "Name of the reconstruction algorithm used " + "to generate the dl2 data for each task." + ) ).tag(config=True) load_type_model_from = Path( @@ -288,7 +290,7 @@ class PredictCTLearnModel(Tool): load_cameradirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) for the regression " + "Path to a Keras model file (Keras3) for the reconstruction " "of the primary particle arrival direction based on camera coordinate offsets." ), allow_none=True, @@ -300,7 +302,7 @@ class PredictCTLearnModel(Tool): load_skydirection_model_from = Path( default_value=None, help=( - "Path to a Keras model file (Keras3) for the regression " + "Path to a Keras model file (Keras3) for the reconstruction " "of the primary particle arrival direction based on spherical coordinate offsets." ), allow_none=True, @@ -410,9 +412,18 @@ class PredictCTLearnModel(Tool): classes = classes_with_traits(DLDataReader) def setup(self): + self.activity_start_time = Time.now() + self.log.info("ctlearn version %s", ctlearn_version) + # Validate the prefixes trait dictionary + validate_trait_dict(self.prefixes, ["type", "energy", "cameradirection", "skydirection", "all"]) # Copy selected tables from the input file to the output file self.log.info("Copying to output destination.") - with HDF5Merger(self.output_path, parent=self) as merger: + with HDF5Merger( + self.output_path, + dl2_subarray=False, + dl2_telescope=False, + parent=self + ) as merger: merger(self.input_url) # Create a MirroredStrategy. self.strategy = tf.distribute.MirroredStrategy() @@ -443,7 +454,27 @@ def setup(self): self._ensure_subarray_consistency() def finish(self): + # Overwrite CTAO reference metadata to the output file + self._overwrite_meta() self.log.info("Tool is shutting down") + + def _overwrite_meta(self): + """Overwrite CTAO metadata in the output file.""" + with warnings.catch_warnings(): + warnings.simplefilter("ignore", tables.NaturalNameWarning) + with tables.open_file(self.output_path, mode="r+") as h5_file: + # Update CTA Activity metadata + h5_file.root._v_attrs["CTA ACTIVITY ID"] = str(uuid.uuid4()) + h5_file.root._v_attrs["CTA ACTIVITY NAME"] = self.name + h5_file.root._v_attrs["CTA ACTIVITY SOFTWARE NAME"] = "ctlearn" + h5_file.root._v_attrs["CTA ACTIVITY SOFTWARE VERSION"] = ctlearn_version + h5_file.root._v_attrs["CTA ACTIVITY START TIME"] = self.activity_start_time.iso + h5_file.root._v_attrs["CTA ACTIVITY STOP TIME"] = Time.now().iso + # Update CTA Product metadata + #TODO: Overwrite CTA PRODUCT DATA LEVELS': 'DL1_IMAGES,DL1_PARAMETERS' also + h5_file.root._v_attrs["CTA PRODUCT CREATION TIME"] = self.activity_start_time.iso + h5_file.root._v_attrs["CTA PRODUCT ID"] = str(uuid.uuid4()) + h5_file.flush() def _ensure_subarray_consistency(self): """ @@ -459,7 +490,7 @@ def _ensure_subarray_consistency(self): self.input_url, focal_length_choice=self.dl1dh_reader.focal_length_choice, ) - if input_subarray.__eq__(self.dl1dh_reader.subarray): + if input_subarray == self.dl1dh_reader.subarray: return self.dl1dh_reader.subarray.to_hdf(self.output_path, overwrite=True) @@ -572,7 +603,7 @@ def prune_group(group, valid_ids): camera_indices = set(layout_table.col("camera_index")) prune_group(camera_group, camera_indices) - def _create_nan_table(self, nonexample_identifiers, columns, shapes): + def _create_nan_table(self, nonexample_identifiers, columns, shapes, reco_task): """ Create a table with NaNs for missing predictions. @@ -587,6 +618,8 @@ def _create_nan_table(self, nonexample_identifiers, columns, shapes): List of column names to create in the table. shapes : list of shapes List of shapes for the columns to create in the table. + reco_task : str + Reconstruction task name. Returns: -------- @@ -604,7 +637,7 @@ def _create_nan_table(self, nonexample_identifiers, columns, shapes): (len(nonexample_identifiers), len(self.dl1dh_reader.tel_ids)), dtype=bool, ), - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes[reco_task]}_telescopes", ) return nan_table @@ -795,7 +828,7 @@ def _predict_particletype(self, example_identifiers): # Create prediction table and add the predicted classification score ('gammaness') particletype_table = example_identifiers.copy() particletype_table.add_column( - predict_data["type"].T[1], name=f"{self.prefix}_tel_prediction" + predict_data["type"].T[1], name=f"{self.prefixes['type']}_tel_prediction" ) return particletype_table, feature_vectors @@ -827,7 +860,7 @@ def _predict_energy(self, example_identifiers): ) # Create prediction table and add the reconstructed energy in TeV energy_table = example_identifiers.copy() - energy_table.add_column(reco_energy, name=f"{self.prefix}_tel_energy") + energy_table.add_column(reco_energy, name=f"{self.prefixes['energy']}_tel_energy") return energy_table, feature_vectors def _predict_cameradirection(self, example_identifiers): @@ -852,7 +885,7 @@ def _predict_cameradirection(self, example_identifiers): Feature vectors extracted from the backbone model. """ self.log.info( - "Predicting for the regression of the primary particle arrival direction based on camera coordinate offsets..." + "Predicting for the reconstruction of the primary particle arrival direction based on camera coordinate offsets..." ) # Predict the data using the loaded direction_model predict_data, feature_vectors = self._predict_with_model( @@ -890,7 +923,7 @@ def _predict_skydirection(self, example_identifiers): Feature vectors extracted from the backbone model. """ self.log.info( - "Predicting for the regression of the primary particle arrival direction based on spherical coordinate offsets..." + "Predicting for the reconstruction of the primary particle arrival direction based on spherical coordinate offsets..." ) # Predict the data using the loaded direction_model predict_data, feature_vectors = self._predict_with_model( @@ -964,8 +997,8 @@ def _transform_cam_coord_offsets_to_sky(self, table) -> Table: # Transform the true Alt/Az coordinates to camera coordinates reco_direction = cam_coord_offset.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table - table.add_column(reco_direction.az.to(u.deg), name=f"{self.prefix}_tel_az") - table.add_column(reco_direction.alt.to(u.deg), name=f"{self.prefix}_tel_alt") + table.add_column(reco_direction.az.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_az") + table.add_column(reco_direction.alt.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_alt") # Remove unnecessary columns from the table that do not the ctapipe DL2 data format table.remove_columns( [ @@ -1028,8 +1061,8 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: # Transform the reco direction from nominal frame to the AltAz frame sky_coord = reco_direction.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table - table.add_column(sky_coord.az.to(u.deg), name=f"{self.prefix}_az") - table.add_column(sky_coord.alt.to(u.deg), name=f"{self.prefix}_alt") + table.add_column(sky_coord.az.to(u.deg), name=f"{self.prefixes['skydirection']}_az") + table.add_column(sky_coord.alt.to(u.deg), name=f"{self.prefixes['skydirection']}_alt") # Remove unnecessary columns from the table that do not the ctapipe DL2 data format table.remove_columns( [ @@ -1168,10 +1201,10 @@ def _create_feature_vectors_table( ) feature_vector_table.add_column( particletype_feature_vectors, - name=f"{self.prefix}_tel_particletype_feature_vectors", + name=f"{self.prefixes['all']}_tel_particletype_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_particletype_feature_vectors") + columns_list.append(f"{self.prefixes['all']}_tel_particletype_feature_vectors") shapes_list.append( ( len(nonexample_identifiers), @@ -1181,10 +1214,10 @@ def _create_feature_vectors_table( if energy_feature_vectors is not None: is_valid_col = ~np.isnan(np.min(energy_feature_vectors, axis=1), dtype=bool) feature_vector_table.add_column( - energy_feature_vectors, name=f"{self.prefix}_tel_energy_feature_vectors" + energy_feature_vectors, name=f"{self.prefixes['all']}_tel_energy_feature_vectors" ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_energy_feature_vectors") + columns_list.append(f"{self.prefixes['all']}_tel_energy_feature_vectors") shapes_list.append( ( len(nonexample_identifiers), @@ -1200,10 +1233,10 @@ def _create_feature_vectors_table( ) feature_vector_table.add_column( direction_feature_vectors, - name=f"{self.prefix}_tel_geometry_feature_vectors", + name=f"{self.prefixes['all']}_tel_geometry_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefix}_tel_geometry_feature_vectors") + columns_list.append(f"{self.prefixes['all']}_tel_geometry_feature_vectors") shapes_list.append( ( len(nonexample_identifiers), @@ -1217,6 +1250,7 @@ def _create_feature_vectors_table( nonexample_identifiers, columns=columns_list, shapes=shapes_list, + reco_task="all", ) feature_vector_table = vstack([feature_vector_table, nan_table]) is_valid_col = np.concatenate( @@ -1225,7 +1259,7 @@ def _create_feature_vectors_table( # Add is_valid column to the feature vector table feature_vector_table.add_column( is_valid_col, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['all']}_tel_is_valid", ) return feature_vector_table @@ -1330,23 +1364,24 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], + columns=[f"{self.prefixes['type']}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], + reco_task="type", ) particletype_table = vstack([particletype_table, nan_table]) # Add is_valid column to the particle type table particletype_table.add_column( ~np.isnan( - particletype_table[f"{self.prefix}_tel_prediction"].data, + particletype_table[f"{self.prefixes['type']}_tel_prediction"].data, dtype=bool, ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['type']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( particletype_table, ParticleClassificationContainer, - prefix=self.prefix, + prefix=self.prefixes['type'], add_tel_prefix=True, ) if self.dl2_telescope: @@ -1361,12 +1396,12 @@ def start(self): write_table( particletype_tel_table, self.output_path, - f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{tel_id:03d}", ) if self.dl2_subarray: @@ -1392,15 +1427,15 @@ def start(self): [ [val] for val in particletype_subarray_table[ - f"{self.prefix}_is_valid" + f"{self.prefixes['type']}_is_valid" ] ], - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes['type']}_telescopes", ) else: self.type_stereo_combiner = StereoCombiner.from_name( self.stereo_combiner_cls, - prefix=self.prefix, + prefix=self.prefixes['type'], property=ReconstructionProperty.PARTICLE_TYPE, parent=self, ) @@ -1411,7 +1446,7 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - particletype_subarray_table[f"{self.prefix}_telescopes"].dtype + particletype_subarray_table[f"{self.prefixes['type']}_telescopes"].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1424,7 +1459,7 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - particletype_subarray_table[f"{self.prefix}_telescopes"] + particletype_subarray_table[f"{self.prefixes['type']}_telescopes"] ): if not tel_id_mask: continue @@ -1435,14 +1470,14 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - particletype_subarray_table[f"{self.prefix}_telescopes"] = ( + particletype_subarray_table[f"{self.prefixes['type']}_telescopes"] = ( reco_telescopes ) # Deduplicate the subarray particletype table to have only one entry per event particletype_subarray_table = super().deduplicate_first_valid( table=particletype_subarray_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['type']}_is_valid", ) # Sort the subarray particletype table particletype_subarray_table.sort(SUBARRAY_EVENT_KEYS) @@ -1450,13 +1485,25 @@ def start(self): write_table( particletype_subarray_table, self.output_path, - f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + ) energy_feature_vectors = None if self.load_energy_model_from is not None: # Predict the energy of the primary particle @@ -1467,22 +1514,23 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], + columns=[f"{self.prefixes['energy']}_tel_energy"], shapes=[(len(nonexample_identifiers),)], + reco_task="energy", ) energy_table = vstack([energy_table, nan_table]) # Add is_valid column to the energy table energy_table.add_column( ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool + energy_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['energy']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( energy_table, ReconstructedEnergyContainer, - prefix=self.prefix, + prefix=self.prefixes['energy'], add_tel_prefix=True, ) if self.dl2_telescope: @@ -1497,12 +1545,12 @@ def start(self): write_table( energy_tel_table, self.output_path, - f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TEL_ENERGY_GROUP}/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( @@ -1525,14 +1573,14 @@ def start(self): energy_subarray_table.add_column( [ [val] - for val in energy_subarray_table[f"{self.prefix}_is_valid"] + for val in energy_subarray_table[f"{self.prefixes['energy']}_is_valid"] ], - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes['energy']}_telescopes", ) else: self.energy_stereo_combiner = StereoCombiner.from_name( self.stereo_combiner_cls, - prefix=self.prefix, + prefix=self.prefixes['energy'], property=ReconstructionProperty.ENERGY, parent=self, ) @@ -1543,7 +1591,7 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - energy_subarray_table[f"{self.prefix}_telescopes"].dtype + energy_subarray_table[f"{self.prefixes['energy']}_telescopes"].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1556,7 +1604,7 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - energy_subarray_table[f"{self.prefix}_telescopes"] + energy_subarray_table[f"{self.prefixes['energy']}_telescopes"] ): if not tel_id_mask: continue @@ -1567,14 +1615,14 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - energy_subarray_table[f"{self.prefix}_telescopes"] = ( + energy_subarray_table[f"{self.prefixes['energy']}_telescopes"] = ( reco_telescopes ) # Deduplicate the subarray energy table to have only one entry per event energy_subarray_table = super().deduplicate_first_valid( table=energy_subarray_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['energy']}_is_valid", ) # Sort the subarray energy table energy_subarray_table.sort(SUBARRAY_EVENT_KEYS) @@ -1582,13 +1630,25 @@ def start(self): write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + ) direction_feature_vectors = None if self.load_cameradirection_model_from is not None: # Join the prediction table with the telescope pointing table @@ -1619,27 +1679,28 @@ def start(self): if len(nonexample_identifiers_tel) > 0: nan_table = super()._create_nan_table( nonexample_identifiers_tel, - columns=[f"{self.prefix}_tel_alt", f"{self.prefix}_tel_az"], + columns=[f"{self.prefixes['cameradirection']}_tel_alt", f"{self.prefixes['cameradirection']}_tel_az"], shapes=[ (len(nonexample_identifiers_tel),), (len(nonexample_identifiers_tel),), ], + reco_task="cameradirection", ) direction_tel_table = vstack([direction_tel_table, nan_table]) direction_tel_table.sort(TEL_EVENT_KEYS) # Add is_valid column to the direction table direction_tel_table.add_column( ~np.isnan( - direction_tel_table[f"{self.prefix}_tel_alt"].data, + direction_tel_table[f"{self.prefixes['cameradirection']}_tel_alt"].data, dtype=bool, ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['cameradirection']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( direction_tel_table, ReconstructedGeometryContainer, - prefix=self.prefix, + prefix=self.prefixes['cameradirection'], add_tel_prefix=True, ) direction_tel_tables.append(direction_tel_table) @@ -1648,12 +1709,12 @@ def start(self): write_table( direction_tel_table, self.output_path, - f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{tel_id:03d}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefix}/tel_{tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{tel_id:03d}", ) if self.dl2_subarray: self.log.info( @@ -1682,15 +1743,15 @@ def start(self): [ [val] for val in direction_subarray_table[ - f"{self.prefix}_is_valid" + f"{self.prefixes['cameradirection']}_is_valid" ] ], - name=f"{self.prefix}_telescopes", + name=f"{self.prefixes['cameradirection']}_telescopes", ) else: self.geometry_stereo_combiner = StereoCombiner.from_name( self.stereo_combiner_cls, - prefix=self.prefix, + prefix=self.prefixes['cameradirection'], property=ReconstructionProperty.GEOMETRY, parent=self, ) @@ -1703,7 +1764,7 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - direction_subarray_table[f"{self.prefix}_telescopes"].dtype + direction_subarray_table[f"{self.prefixes['cameradirection']}_telescopes"].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1716,7 +1777,7 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - direction_subarray_table[f"{self.prefix}_telescopes"] + direction_subarray_table[f"{self.prefixes['cameradirection']}_telescopes"] ): if not tel_id_mask: continue @@ -1727,14 +1788,14 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - direction_subarray_table[f"{self.prefix}_telescopes"] = ( + direction_subarray_table[f"{self.prefixes['cameradirection']}_telescopes"] = ( reco_telescopes ) # Deduplicate the subarray direction table to have only one entry per event direction_subarray_table = super().deduplicate_first_valid( table=direction_subarray_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['cameradirection']}_is_valid", ) # Sort the subarray geometry table direction_subarray_table.sort(SUBARRAY_EVENT_KEYS) @@ -1742,13 +1803,25 @@ def start(self): write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['cameradirection']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['cameradirection']}", + ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: self.log.info("Processing and storing dl1 feature vectors...") @@ -1773,12 +1846,12 @@ def start(self): write_table( feature_vectors_tel_table, self.output_path, - f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{tel_id:03d}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TEL_GROUP}/features/{self.prefix}/tel_{tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{tel_id:03d}", ) def _store_mc_telescope_pointing(self, all_identifiers): @@ -1902,7 +1975,7 @@ def start(self): survival_telescopes.append(survival_mask) # Add the survival telescopes to the example_identifiers example_identifiers.add_column( - survival_telescopes, name=f"{self.prefix}_telescopes" + survival_telescopes, name=f"{self.prefixes['all']}_telescopes" ) # Pointing table for the stereo mode for MC simulation if self.dl1dh_reader.process_type == ProcessType.Simulation: @@ -1924,50 +1997,66 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], + columns=[f"{self.prefixes['type']}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], + reco_task="type", ) particletype_table = vstack([particletype_table, nan_table]) # Add is_valid column to the particletype table particletype_table.add_column( ~np.isnan( - particletype_table[f"{self.prefix}_tel_prediction"].data, + particletype_table[f"{self.prefixes['type']}_tel_prediction"].data, dtype=bool, ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['type']}_tel_is_valid", ) # Rename the columns for the stereo mode particletype_table.rename_column( - f"{self.prefix}_tel_prediction", f"{self.prefix}_prediction" + f"{self.prefixes['type']}_tel_prediction", f"{self.prefixes['type']}_prediction" ) particletype_table.rename_column( - f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" + f"{self.prefixes['type']}_tel_is_valid", f"{self.prefixes['type']}_is_valid" + ) + particletype_table.rename_column( + f"{self.prefixes['all']}_telescopes", f"{self.prefixes['type']}_telescopes" ) # Deduplicate the subarray particletype table to have only one entry per event particletype_table = super().deduplicate_first_valid( table=particletype_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['type']}_is_valid", ) particletype_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( particletype_table, ParticleClassificationContainer, - prefix=self.prefix, + prefix=self.prefixes['type'], ) # Save the prediction to the output file write_table( particletype_table, self.output_path, - f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", overwrite=self.overwrite_tables, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['type']}", + ) energy_feature_vectors = None if self.load_energy_model_from is not None: # Predict the energy of the primary particle @@ -1979,48 +2068,64 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], + columns=[f"{self.prefixes['energy']}_tel_energy"], shapes=[(len(nonexample_identifiers),)], + reco_task="energy", ) energy_table = vstack([energy_table, nan_table]) # Add is_valid column to the energy table energy_table.add_column( ~np.isnan( - energy_table[f"{self.prefix}_tel_energy"].data, dtype=bool + energy_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool ), - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['energy']}_tel_is_valid", ) # Rename the columns for the stereo mode energy_table.rename_column( - f"{self.prefix}_tel_energy", f"{self.prefix}_energy" + f"{self.prefixes['energy']}_tel_energy", f"{self.prefixes['energy']}_energy" + ) + energy_table.rename_column( + f"{self.prefixes['energy']}_tel_is_valid", f"{self.prefixes['energy']}_is_valid" ) energy_table.rename_column( - f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" + f"{self.prefixes['all']}_telescopes", f"{self.prefixes['energy']}_telescopes" ) # Deduplicate the subarray energy table to have only one entry per event energy_table = super().deduplicate_first_valid( table=energy_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['energy']}_is_valid", ) energy_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( energy_table, ReconstructedEnergyContainer, - prefix=self.prefix, + prefix=self.prefixes['energy'], ) # Save the prediction to the output file write_table( energy_table, self.output_path, - f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['energy']}", + ) direction_feature_vectors = None if self.load_skydirection_model_from is not None: # Join the prediction table with the telescope pointing table @@ -2042,43 +2147,58 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_alt", f"{self.prefix}_az"], + columns=[f"{self.prefixes['skydirection']}_alt", f"{self.prefixes['skydirection']}_az"], shapes=[ (len(nonexample_identifiers),), (len(nonexample_identifiers),), ], + reco_task="skydirection", ) direction_table = vstack([direction_table, nan_table]) # Add is_valid column to the direction table direction_table.add_column( - ~np.isnan(direction_table[f"{self.prefix}_alt"].data, dtype=bool), - name=f"{self.prefix}_is_valid", + ~np.isnan(direction_table[f"{self.prefixes['skydirection']}_alt"].data, dtype=bool), + name=f"{self.prefixes['skydirection']}_is_valid", + ) + direction_table.rename_column( + f"{self.prefixes['all']}_telescopes", f"{self.prefixes['skydirection']}_telescopes" ) # Deduplicate the subarray direction table to have only one entry per event direction_table = super().deduplicate_first_valid( table=direction_table, keys=SUBARRAY_EVENT_KEYS, - valid_col=f"{self.prefix}_is_valid", + valid_col=f"{self.prefixes['skydirection']}_is_valid", ) direction_table.sort(SUBARRAY_EVENT_KEYS) # Add the default values and meta data to the table add_defaults_and_meta( direction_table, ReconstructedGeometryContainer, - prefix=self.prefix, + prefix=self.prefixes['skydirection'], ) # Save the prediction to the output file write_table( direction_table, self.output_path, - f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['skydirection']}", ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['skydirection']}", ) - + # Store the telescope event statistics table + write_table( + self.dl1dh_reader.quality_query.to_table(functions=True), + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['skydirection']}", + append=True, + ) + self.log.info( + "DL2 service telescope event statistics data was stored in '%s' under '%s'", + self.output_path, + f"{DL2_EVENT_STATISTICS_GROUP}/{self.prefixes['skydirection']}", + ) # Create the feature vector table if the DL1 features are enabled if self.dl1_features: self.log.info("Processing and storing dl1 feature vectors...") @@ -2091,34 +2211,34 @@ def start(self): ) # Loop over the selected telescopes and store the feature vectors # for each telescope in the output file. The feature vectors are stored - # in the DL1_TEL_GROUP/features/{prefix}/tel_{tel_id:03d} table. + # in the DL1_TEL_GROUP/features/{self.prefixes['all']}/tel_{tel_id:03d} table. # Rename the columns for the stereo mode feature_vector_table.rename_column( - f"{self.prefix}_tel_particletype_feature_vectors", - f"{self.prefix}_particletype_feature_vectors", + f"{self.prefixes['all']}_tel_particletype_feature_vectors", + f"{self.prefixes['all']}_particletype_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefix}_tel_energy_feature_vectors", - f"{self.prefix}_energy_feature_vectors", + f"{self.prefixes['all']}_tel_energy_feature_vectors", + f"{self.prefixes['all']}_energy_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefix}_tel_geometry_feature_vectors", - f"{self.prefix}_geometry_feature_vectors", + f"{self.prefixes['all']}_tel_geometry_feature_vectors", + f"{self.prefixes['all']}_geometry_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefix}_tel_is_valid", f"{self.prefix}_is_valid" + f"{self.prefixes['all']}_tel_is_valid", f"{self.prefixes['all']}_is_valid" ) feature_vector_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file write_table( feature_vector_table, self.output_path, - f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", + f"{DL1_SUBARRAY_GROUP}/features/{self.prefixes['all']}", ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_SUBARRAY_GROUP}/features/{self.prefix}", + f"{DL1_SUBARRAY_GROUP}/features/{self.prefixes['all']}", ) def _store_mc_subarray_pointing(self, all_identifiers): diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py index 23f32352..089328f9 100644 --- a/ctlearn/tools/tests/test_predict_model.py +++ b/ctlearn/tools/tests/test_predict_model.py @@ -10,16 +10,27 @@ REQUIRED_COLUMNS = [ "event_id", "obs_id", - "CTLearn_alt", + "CTLearnCameraReconstructor_tel_alt", + "CTLearnCameraReconstructor_alt", "true_alt", - "CTLearn_az", + "CTLearnCameraReconstructor_tel_az", + "CTLearnCameraReconstructor_az", "true_az", - "CTLearn_prediction", + "CTLearnClassifier_tel_prediction", + "CTLearnClassifier_prediction", "true_shower_primary_id", - "CTLearn_energy", + "CTLearnRegressor_tel_energy", + "CTLearnRegressor_energy", "true_energy", - "CTLearn_is_valid", - "CTLearn_telescopes", + "CTLearnCameraReconstructor_tel_is_valid", + "CTLearnCameraReconstructor_is_valid", + "CTLearnClassifier_tel_is_valid", + "CTLearnClassifier_is_valid", + "CTLearnRegressor_tel_is_valid", + "CTLearnRegressor_is_valid", + "CTLearnCameraReconstructor_telescopes", + "CTLearnClassifier_telescopes", + "CTLearnRegressor_telescopes", "tels_with_trigger" ] @@ -36,35 +47,45 @@ def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2 dl2_dir = tmp_path / "dl2_output" dl2_dir.mkdir(parents=True, exist_ok=True) - + # Define telescope types and their allowed telescopes + telescope_types = { + "LST": [2], + "MST": [7, 13, 15, 16, 17, 19], + #"SST": [30, 37, 43, 44, 53], + } + image_mapper_types = { + "LST": "BilinearMapper", + "MST": "OversamplingMapper", + #"SST": "SquareMapper", + } # Hardcopy the trained models to the model directory - for reco_task in ["type", "energy", "cameradirection"]: - shutil.copy(ctlearn_trained_dl1_models[f"{reco_task}"], model_dir / f"ctlearn_model_{reco_task}.keras") - model_file = model_dir / f"ctlearn_model_{reco_task}.keras" - assert model_file.exists(), f"Trained model file not found for {reco_task}" - + for telescope_type in telescope_types.keys(): + for reco_task in ["type", "energy", "cameradirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy(ctlearn_trained_dl1_models[key], model_dir / f"ctlearn_model_{key}.keras") + model_file = model_dir / f"ctlearn_model_{key}.keras" + assert model_file.exists(), f"Trained model file not found for {key}" # Build command-line arguments argv = [ f"--input_url={dl1_gamma_file}", - "--PredictCTLearnModel.batch_size=4", + "--PredictCTLearnModel.batch_size=2", "--DLImageReader.focal_length_choice=EQUIVALENT", - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_energy.keras", - f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_model_cameradirection.keras", "--no-dl1-images", "--no-true-images", f"--{dl2_tel_flag}", ] - # Test with different allowed telescope lists (single and multiple telescope IDs) - allowed_tels_dict = {"single_tel_id" : [7], "multiple_tel_ids": [7, 13, 15, 16, 17, 19]} - for name, allowed_tels in allowed_tels_dict.items(): - output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{name}.dl2.h5" + for telescope_type, allowed_tels in telescope_types.items(): + output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}.dl2.h5" # Run Prediction tool assert run_tool( MonoPredictCTLearnModel(), argv = argv + [ f"--output={output_file}", f"--DLImageReader.allowed_tels={allowed_tels}", + f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_model_{telescope_type}_cameradirection.keras", ], cwd=tmp_path ) == 0 @@ -84,11 +105,15 @@ def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2 "telescope_pointing_azimuth", "telescope_pointing_altitude", ]: + if dl2_tel_flag == "no-dl2-telescope" and "_tel_" in col: + continue assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" # Check subarray-wise data subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) assert len(subarray_events) > 0 for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index 4c440350..a55b524b 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -24,6 +24,7 @@ Unicode, ) from dl1_data_handler.reader import DLDataReader +from ctlearn import __version__ as ctlearn_version from ctlearn.core.loader import DLDataLoader from ctlearn.core.model import CTLearnModel from ctlearn.utils import validate_trait_dict @@ -154,10 +155,10 @@ class TrainCTLearnModel(Tool): allow_none=False, help=( "List of reconstruction tasks to perform. " - "'type': classification of the primary particle type " - "'energy': regression of the primary particle energy " - "'cameradirection': regression of the primary particle arrival direction in camera coordinates " - "'skydirection': regression of the primary particle arrival direction in sky coordinates" + "'type': classification of the primary particle type; " + "'energy': regression of the primary particle energy; " + "'cameradirection': reconstruction of the primary particle arrival direction in camera coordinates; " + "'skydirection': reconstruction of the primary particle arrival direction in sky coordinates." ) ).tag(config=True) @@ -240,6 +241,7 @@ class TrainCTLearnModel(Tool): classes = classes_with_traits(CTLearnModel) + classes_with_traits(DLDataReader) def setup(self): + self.log.info("ctlearn version %s", ctlearn_version) # Check if the output directory exists if self.output_dir.exists(): raise ToolConfigurationError( From 78b0f22d47481266de6f2e46a705b733d99fae63 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 22 Jan 2026 11:21:15 +0100 Subject: [PATCH 19/27] added unittests for R1 waveforms to DL2 --- ctlearn/conftest.py | 100 ++++++++++++++++++++-- ctlearn/tools/tests/test_predict_model.py | 92 +++++++++++++++++--- 2 files changed, 177 insertions(+), 15 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 485104da..34d3b56d 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -73,17 +73,107 @@ def r1_gamma_file(r1_tmp_path, gamma_simtel_path): output = r1_tmp_path / "gamma.r1.h5" + allowed_tels = [1,2] argv = [ f"--input={gamma_simtel_path}", f"--output={output}", f"--DataWriter.write_r1_waveforms=True", "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 return output @pytest.fixture(scope="session") -def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): +def r1_proton_file(r1_tmp_path, proton_simtel_path): + """ + R1 file containing both waveforms and parameters from a proton simulation set. + """ + from ctapipe.tools.process import ProcessorTool + + # Restrict to two LSTs for R1 tests to reduce computational load + allowed_tels = [1,2] + output = r1_tmp_path / "proton.r1.h5" + argv = [ + f"--input={proton_simtel_path}", + f"--output={output}", + f"--DataWriter.write_r1_waveforms=True", + "--SimTelEventSource.focal_length_choice=EQUIVALENT", + f"--SimTelEventSource.allowed_tels={allowed_tels}", + + ] + assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 + return output + +@pytest.fixture(scope="session") +def ctlearn_trained_r1_mono_models(r1_gamma_file, r1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the R1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_r1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_r1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy R1 gamma file to the signal directory + shutil.copy(r1_gamma_file, signal_dir) + # Hardcopy R1 proton file to the background directory + shutil.copy(r1_proton_file, background_dir) + + # Configuration to disable quality cuts to increase + # training statistics mainly needed for LSTs. + config = Config( + { + "TableQualityQuery": { + "quality_criteria": [], + }, + }, + ) + + # Restrict to two LSTs for R1 tests to reduce computational load + telescope_type = "LST" + # Loop over reconstruction tasks and train models for each combination + ctlearn_trained_r1_mono_models = {} + for reco_task in ["type", "energy", "cameradirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.r1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--TrainCTLearnModel.dl1dh_reader_type=DLWaveformReader", + "--DLWaveformReader.sequence_length=5", + "--DLWaveformReader.focal_length_choice=EQUIVALENT", + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.r1.h5", + "--DLWaveformReader.enforce_subarray_equality=False", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + # Check that the trained model exists + assert ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"].exists() + return ctlearn_trained_r1_mono_models + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): """ Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. @@ -125,7 +215,7 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory } # Loop over telescope types and reconstruction tasks # and train models for each combination - ctlearn_trained_dl1_models = {} + ctlearn_trained_dl1_mono_models = {} for telescope_type, allowed_tels in telescope_types.items(): for reco_task in ["type", "energy", "cameradirection"]: # Output directory for trained model @@ -155,7 +245,7 @@ def ctlearn_trained_dl1_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory # Run training assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 - ctlearn_trained_dl1_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" # Check that the trained model exists - assert ctlearn_trained_dl1_models[f"{telescope_type}_{reco_task}"].exists() - return ctlearn_trained_dl1_models \ No newline at end of file + assert ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"].exists() + return ctlearn_trained_dl1_mono_models \ No newline at end of file diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py index 089328f9..53ce6083 100644 --- a/ctlearn/tools/tests/test_predict_model.py +++ b/ctlearn/tools/tests/test_predict_model.py @@ -35,11 +35,83 @@ ] +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2") +def test_predict_mono_model_with_r1_waveforms(tmp_path, ctlearn_trained_r1_mono_models, r1_gamma_file): + """ + Test training CTLearn mono model using the R1 gamma and proton files for all reconstruction tasks + and predicting DL2 from R1 waveforms. Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + # Define telescope types and their available telescopes + telescope_type = "LST" + available_tels = [1,2] + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "cameradirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy(ctlearn_trained_r1_mono_models[key], model_dir / f"ctlearn_mono_model_{key}.keras") + model_file = model_dir / f"ctlearn_mono_model_{key}.keras" + assert model_file.exists(), f"Trained mono model file not found for {key}" + # Build command-line arguments + argv = [ + f"--input_url={r1_gamma_file}", + "--PredictCTLearnModel.batch_size=2", + "--PredictCTLearnModel.dl1dh_reader_type=DLWaveformReader", + "--DLWaveformReader.sequence_length=5", + "--DLWaveformReader.focal_length_choice=EQUIVALENT", + "--no-r1-waveforms", + "--dl2-telescope", + ] + output_file = dl2_dir / f"gamma_{telescope_type}_from_waveforms.dl2.h5" + # Run Prediction tool + assert run_tool( + MonoPredictCTLearnModel(), + argv = argv + [ + f"--output={output_file}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + ], + cwd=tmp_path + ) == 0 + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id(telescopes=available_tels, dl1_parameters=True, dl2=True) + for tel_id in available_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" + assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") @pytest.mark.parametrize("dl2_tel_flag", ["dl2-telescope", "no-dl2-telescope"]) -def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2_tel_flag): +def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_models, dl1_gamma_file, dl2_tel_flag): """ - Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. - Each test run gets its own isolated temp directories. + Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks + and predicting DL2 from DL1 images. Each test run gets its own isolated temp directories. """ model_dir = tmp_path / "trained_models" @@ -62,9 +134,9 @@ def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2 for telescope_type in telescope_types.keys(): for reco_task in ["type", "energy", "cameradirection"]: key = f"{telescope_type}_{reco_task}" - shutil.copy(ctlearn_trained_dl1_models[key], model_dir / f"ctlearn_model_{key}.keras") - model_file = model_dir / f"ctlearn_model_{key}.keras" - assert model_file.exists(), f"Trained model file not found for {key}" + shutil.copy(ctlearn_trained_dl1_mono_models[key], model_dir / f"ctlearn_mono_model_{key}.keras") + model_file = model_dir / f"ctlearn_mono_model_{key}.keras" + assert model_file.exists(), f"Trained mono model file not found for {key}" # Build command-line arguments argv = [ f"--input_url={dl1_gamma_file}", @@ -75,7 +147,7 @@ def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2 f"--{dl2_tel_flag}", ] for telescope_type, allowed_tels in telescope_types.items(): - output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}.dl2.h5" + output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}_from_images.dl2.h5" # Run Prediction tool assert run_tool( MonoPredictCTLearnModel(), @@ -83,9 +155,9 @@ def test_predict_model(tmp_path, ctlearn_trained_dl1_models, dl1_gamma_file, dl2 f"--output={output_file}", f"--DLImageReader.allowed_tels={allowed_tels}", f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_model_{telescope_type}_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_model_{telescope_type}_energy.keras", - f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_model_{telescope_type}_cameradirection.keras", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", ], cwd=tmp_path ) == 0 From 9660ca50df9221f4015d99270abdaa796e7a7f19 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 22 Jan 2026 11:52:42 +0100 Subject: [PATCH 20/27] overwrite DataLevels in metadata --- ctlearn/tools/predict_model.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index a5d74e70..57b75937 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -44,6 +44,7 @@ from ctapipe.monitoring.interpolation import PointingInterpolator from ctapipe.instrument import SubarrayDescription from ctapipe.io import read_table, write_table, HDF5Merger +from ctapipe.io.datalevels import DataLevel from ctapipe.io.hdf5dataformat import ( DL0_TEL_POINTING_GROUP, DL1_SUBARRAY_GROUP, @@ -71,6 +72,7 @@ DL2_TEL_PARTICLETYPE_GROUP, DL2_TEL_ENERGY_GROUP, DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_GROUP, DL2_SUBARRAY_PARTICLETYPE_GROUP, DL2_SUBARRAY_ENERGY_GROUP, DL2_SUBARRAY_GEOMETRY_GROUP, @@ -110,6 +112,14 @@ SIMULATION_IMPACT_GROUP, SIMULATION_PARAMETERS_GROUP, ] +DATALEVEL_TO_GROUP = { + DataLevel.R0: R0_TEL_GROUP, + DataLevel.R1: R1_TEL_GROUP, + DataLevel.DL1_IMAGES: DL1_TEL_IMAGES_GROUP, + DataLevel.DL1_PARAMETERS: DL1_TEL_PARAMETERS_GROUP, + DataLevel.DL1_MUON: DL1_TEL_MUON_GROUP, + DataLevel.DL2: DL2_SUBARRAY_GROUP, +} class CannotPredict(OSError): @@ -460,6 +470,7 @@ def finish(self): def _overwrite_meta(self): """Overwrite CTAO metadata in the output file.""" + # TODO: Upgrade to new CTAO metatdata standard when available with warnings.catch_warnings(): warnings.simplefilter("ignore", tables.NaturalNameWarning) with tables.open_file(self.output_path, mode="r+") as h5_file: @@ -471,11 +482,20 @@ def _overwrite_meta(self): h5_file.root._v_attrs["CTA ACTIVITY START TIME"] = self.activity_start_time.iso h5_file.root._v_attrs["CTA ACTIVITY STOP TIME"] = Time.now().iso # Update CTA Product metadata - #TODO: Overwrite CTA PRODUCT DATA LEVELS': 'DL1_IMAGES,DL1_PARAMETERS' also + h5_file.root._v_attrs["CTA PRODUCT DATA LEVELS"] = self._get_data_levels(h5_file) h5_file.root._v_attrs["CTA PRODUCT CREATION TIME"] = self.activity_start_time.iso h5_file.root._v_attrs["CTA PRODUCT ID"] = str(uuid.uuid4()) h5_file.flush() + def _get_data_levels(self, h5file): + """Get the data levels present in the HDF5 file.""" + data_levels = { + level.name + for level, group in DATALEVEL_TO_GROUP.items() + if hasattr(h5file.root, group) + } + return ",".join(sorted(data_levels)) + def _ensure_subarray_consistency(self): """ Align subarray metadata and trigger tables with the selected telescopes. From 41fac09c250f939da3237af5f3532dcc722dddd9 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 22 Jan 2026 15:09:22 +0100 Subject: [PATCH 21/27] added unittest for StereoPredictCTLearnModel --- ctlearn/conftest.py | 76 +++++++++++++++++++- ctlearn/tools/predict_model.py | 33 ++++----- ctlearn/tools/tests/test_predict_model.py | 86 +++++++++++++++++++++-- ctlearn/tools/train_model.py | 1 - pyproject.toml | 3 + 5 files changed, 172 insertions(+), 27 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 34d3b56d..1170d94a 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -111,7 +111,7 @@ def ctlearn_trained_r1_mono_models(r1_gamma_file, r1_proton_file, tmp_path_facto Test training CTLearn model using the R1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. """ - tmp_path = tmp_path_factory.mktemp("ctlearn_models") + tmp_path = tmp_path_factory.mktemp("ctlearn_mono_models") # Temporary directories for signal and background signal_dir = tmp_path / "gamma_r1" @@ -178,7 +178,7 @@ def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_fa Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. """ - tmp_path = tmp_path_factory.mktemp("ctlearn_models") + tmp_path = tmp_path_factory.mktemp("ctlearn_mono_models") # Temporary directories for signal and background signal_dir = tmp_path / "gamma_dl1" @@ -248,4 +248,74 @@ def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_fa ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" # Check that the trained model exists assert ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"].exists() - return ctlearn_trained_dl1_mono_models \ No newline at end of file + return ctlearn_trained_dl1_mono_models + +@pytest.fixture(scope="session") +def ctlearn_trained_dl1_stereo_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): + """ + Test training CTLearn model using the R1 gamma and proton files for all reconstruction tasks. + Each test run gets its own isolated temp directories. + """ + tmp_path = tmp_path_factory.mktemp("ctlearn_stereo_models") + + # Temporary directories for signal and background + signal_dir = tmp_path / "gamma_dl1" + signal_dir.mkdir(parents=True, exist_ok=True) + + background_dir = tmp_path / "proton_dl1" + background_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy DL1 gamma file to the signal directory + shutil.copy(dl1_gamma_file, signal_dir) + # Hardcopy DL1 proton file to the background directory + shutil.copy(dl1_proton_file, background_dir) + + # Configuration to disable quality cuts to increase + # training statistics mainly needed for LSTs. + config = Config( + { + "TableQualityQuery": { + "quality_criteria": [], + }, + }, + ) + + # Restrict to three MSTs for DL1 tests to reduce computational load + telescope_type = "MST" + allowed_tels = [7, 13, 15] + + # Loop over reconstruction tasks and train models for each combination + ctlearn_trained_dl1_stereo_models = {} + for reco_task in ["type", "energy", "skydirection"]: + # Output directory for trained model + output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" + + # Build command-line arguments + argv = [ + f"--signal={signal_dir}", + "--pattern-signal=*.dl1.h5", + f"--output={output_dir}", + f"--reco={reco_task}", + "--TrainCTLearnModel.n_epochs=1", + "--TrainCTLearnModel.batch_size=2", + "--TrainCTLearnModel.stack_telescope_images=True", + "--DLImageReader.mode=stereo", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}" + ] + + # Include background only for classification task + if reco_task == "type": + argv.extend([ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ]) + + # Run training + assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + + ctlearn_trained_dl1_stereo_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + # Check that the trained model exists + assert ctlearn_trained_dl1_stereo_models[f"{telescope_type}_{reco_task}"].exists() + return ctlearn_trained_dl1_stereo_models \ No newline at end of file diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 57b75937..c1c63ba9 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -953,7 +953,7 @@ def _predict_skydirection(self, example_identifiers): # from the telescope pointing. fov_lon = u.Quantity(predict_data["skydirection"].T[0], unit=u.deg) fov_lat = u.Quantity(predict_data["skydirection"].T[1], unit=u.deg) - # Create prediction table and add the reconstructed energy in TeV + # Create prediction table and add the reconstructed fov_lon and fov_lat skydirection_table = example_identifiers.copy() skydirection_table.add_column(fov_lon, name="fov_lon") skydirection_table.add_column(fov_lat, name="fov_lat") @@ -1979,6 +1979,8 @@ def start(self): DL1_TEL_TRIGGER_TABLE, ) all_identifiers.keep_columns(SUBARRAY_EVENT_KEYS + ["time"]) + # Unique example identifiers by events + all_identifiers = unique(all_identifiers, keys=SUBARRAY_EVENT_KEYS) nonexample_identifiers = setdiff( all_identifiers, example_identifiers, keys=SUBARRAY_EVENT_KEYS ) @@ -2013,6 +2015,9 @@ def start(self): super()._predict_particletype(example_identifiers) ) if self.dl2_subarray: + particletype_table.rename_column( + f"{self.prefixes['all']}_telescopes", f"{self.prefixes['type']}_telescopes" + ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( @@ -2028,18 +2033,12 @@ def start(self): particletype_table[f"{self.prefixes['type']}_tel_prediction"].data, dtype=bool, ), - name=f"{self.prefixes['type']}_tel_is_valid", + name=f"{self.prefixes['type']}_is_valid", ) # Rename the columns for the stereo mode particletype_table.rename_column( f"{self.prefixes['type']}_tel_prediction", f"{self.prefixes['type']}_prediction" ) - particletype_table.rename_column( - f"{self.prefixes['type']}_tel_is_valid", f"{self.prefixes['type']}_is_valid" - ) - particletype_table.rename_column( - f"{self.prefixes['all']}_telescopes", f"{self.prefixes['type']}_telescopes" - ) # Deduplicate the subarray particletype table to have only one entry per event particletype_table = super().deduplicate_first_valid( table=particletype_table, @@ -2058,7 +2057,6 @@ def start(self): particletype_table, self.output_path, f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", - overwrite=self.overwrite_tables, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", @@ -2084,6 +2082,9 @@ def start(self): example_identifiers ) if self.dl2_subarray: + energy_table.rename_column( + f"{self.prefixes['all']}_telescopes", f"{self.prefixes['energy']}_telescopes" + ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( @@ -2098,18 +2099,12 @@ def start(self): ~np.isnan( energy_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool ), - name=f"{self.prefixes['energy']}_tel_is_valid", + name=f"{self.prefixes['energy']}_is_valid", ) # Rename the columns for the stereo mode energy_table.rename_column( f"{self.prefixes['energy']}_tel_energy", f"{self.prefixes['energy']}_energy" ) - energy_table.rename_column( - f"{self.prefixes['energy']}_tel_is_valid", f"{self.prefixes['energy']}_is_valid" - ) - energy_table.rename_column( - f"{self.prefixes['all']}_telescopes", f"{self.prefixes['energy']}_telescopes" - ) # Deduplicate the subarray energy table to have only one entry per event energy_table = super().deduplicate_first_valid( table=energy_table, @@ -2159,6 +2154,9 @@ def start(self): example_identifiers ) if self.dl2_subarray: + direction_table.rename_column( + f"{self.prefixes['all']}_telescopes", f"{self.prefixes['skydirection']}_telescopes" + ) # Transform the spherical coordinate offsets to sky coordinates direction_table = super()._transform_spher_coord_offsets_to_sky( direction_table @@ -2180,9 +2178,6 @@ def start(self): ~np.isnan(direction_table[f"{self.prefixes['skydirection']}_alt"].data, dtype=bool), name=f"{self.prefixes['skydirection']}_is_valid", ) - direction_table.rename_column( - f"{self.prefixes['all']}_telescopes", f"{self.prefixes['skydirection']}_telescopes" - ) # Deduplicate the subarray direction table to have only one entry per event direction_table = super().deduplicate_first_valid( table=direction_table, diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py index 53ce6083..7edeef85 100644 --- a/ctlearn/tools/tests/test_predict_model.py +++ b/ctlearn/tools/tests/test_predict_model.py @@ -4,7 +4,7 @@ from ctapipe.core import run_tool from ctapipe.io import TableLoader -from ctlearn.tools import MonoPredictCTLearnModel +from ctlearn.tools import MonoPredictCTLearnModel, StereoPredictCTLearnModel # Columns that should be present in the output DL2 file REQUIRED_COLUMNS = [ @@ -67,7 +67,7 @@ def test_predict_mono_model_with_r1_waveforms(tmp_path, ctlearn_trained_r1_mono_ "--no-r1-waveforms", "--dl2-telescope", ] - output_file = dl2_dir / f"gamma_{telescope_type}_from_waveforms.dl2.h5" + output_file = dl2_dir / f"gamma_{telescope_type}_mono_from_waveforms.dl2.h5" # Run Prediction tool assert run_tool( MonoPredictCTLearnModel(), @@ -147,7 +147,7 @@ def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_m f"--{dl2_tel_flag}", ] for telescope_type, allowed_tels in telescope_types.items(): - output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}_from_images.dl2.h5" + output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}_mono_from_images.dl2.h5" # Run Prediction tool assert run_tool( MonoPredictCTLearnModel(), @@ -188,4 +188,82 @@ def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_m if "_tel_" in col: continue assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" - assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file + assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") +def test_predict_stereo_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_stereo_models, dl1_gamma_file): + """ + Test training CTLearn stereo model using the DL1 gamma and proton files for all reconstruction tasks + and predicting DL2 from DL1 images. Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + # Define telescope types and their available telescopes + telescope_type = "MST" + allowed_tels = [7, 13, 15] + + # Hardcopy the trained models to the model directory + for reco_task in ["type", "energy", "skydirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy(ctlearn_trained_dl1_stereo_models[key], model_dir / f"ctlearn_stereo_model_{key}.keras") + model_file = model_dir / f"ctlearn_stereo_model_{key}.keras" + assert model_file.exists(), f"Trained stereo model file not found for {key}" + # Build command-line arguments + argv = [ + f"--input_url={dl1_gamma_file}", + "--PredictCTLearnModel.batch_size=2", + "--PredictCTLearnModel.stack_telescope_images=True", + "--DLImageReader.mode=stereo", + "--DLImageReader.focal_length_choice=EQUIVALENT", + f"--DLImageReader.allowed_tels={allowed_tels}", + "--no-dl1-images", + "--no-true-images", + ] + output_file = dl2_dir / f"gamma_{telescope_type}_stereo_from_images.dl2.h5" + # Run Prediction tool + assert run_tool( + StereoPredictCTLearnModel(), + argv = argv + [ + f"--output={output_file}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_skydirection_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_skydirection.keras", + ], + cwd=tmp_path + ) == 0 + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + # Check that the created DL2 file can be read with the TableLoader + with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id(telescopes=allowed_tels, dl1_parameters=True, dl2=True) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "leakage_pixels_width_2", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + if "_tel_" in col: + continue + if "CTLearnCameraReconstructor" in col: + col = col.replace("CTLearnCameraReconstructor", "CTLearnSkyReconstructor") + assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" + assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + if "CTLearnCameraReconstructor" in col: + col = col.replace("CTLearnCameraReconstructor", "CTLearnSkyReconstructor") + assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" + assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index a55b524b..5b25ab07 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -6,7 +6,6 @@ import keras import pandas as pd import numpy as np -import shutil import tensorflow as tf from ctapipe.core import Tool diff --git a/pyproject.toml b/pyproject.toml index c361c71f..107325d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,6 +70,9 @@ version_file = "ctlearn/_version.py" [tool.pytest.ini_options] testpaths = ["ctlearn"] +markers = [ + "verifies_usecase(usecase_id): mark test as verifying a specific use case", +] norecursedirs = [ ".git", From 0f262529bac3c5c0c3ed0d5584bb7bc9879d0b4d Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Thu, 22 Jan 2026 16:50:18 +0100 Subject: [PATCH 22/27] format via black --- ctlearn/conftest.py | 96 ++++++---- ctlearn/tools/predict_model.py | 217 +++++++++++++--------- ctlearn/tools/tests/test_predict_model.py | 207 ++++++++++++++------- ctlearn/tools/tests/test_train_model.py | 31 ++-- ctlearn/tools/train_model.py | 134 ++++++++----- 5 files changed, 437 insertions(+), 248 deletions(-) diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 1170d94a..20ce564e 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -10,26 +10,31 @@ from ctapipe.utils import get_dataset_path from ctlearn.tools import TrainCTLearnModel + @pytest.fixture(scope="session") def gamma_simtel_path(): return get_dataset_path("gamma_test_large.simtel.gz") + @pytest.fixture(scope="session") def proton_simtel_path(): return get_dataset_path( "proton_20deg_0deg_run4___cta-prod5-paranal_desert-2147m-Paranal-dark-100evts.simtel.zst" ) + @pytest.fixture(scope="session") def dl1_tmp_path(tmp_path_factory): """Temporary directory for global dl1 test data""" return tmp_path_factory.mktemp("dl1_") + @pytest.fixture(scope="session") def r1_tmp_path(tmp_path_factory): """Temporary directory for global r1 test data""" return tmp_path_factory.mktemp("r1_") + @pytest.fixture(scope="session") def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ @@ -47,6 +52,7 @@ def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output + @pytest.fixture(scope="session") def dl1_proton_file(dl1_tmp_path, proton_simtel_path): """ @@ -64,6 +70,7 @@ def dl1_proton_file(dl1_tmp_path, proton_simtel_path): assert run_tool(ProcessorTool(), argv=argv, cwd=dl1_tmp_path) == 0 return output + @pytest.fixture(scope="session") def r1_gamma_file(r1_tmp_path, gamma_simtel_path): """ @@ -73,7 +80,7 @@ def r1_gamma_file(r1_tmp_path, gamma_simtel_path): output = r1_tmp_path / "gamma.r1.h5" - allowed_tels = [1,2] + allowed_tels = [1, 2] argv = [ f"--input={gamma_simtel_path}", f"--output={output}", @@ -84,6 +91,7 @@ def r1_gamma_file(r1_tmp_path, gamma_simtel_path): assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 return output + @pytest.fixture(scope="session") def r1_proton_file(r1_tmp_path, proton_simtel_path): """ @@ -92,7 +100,7 @@ def r1_proton_file(r1_tmp_path, proton_simtel_path): from ctapipe.tools.process import ProcessorTool # Restrict to two LSTs for R1 tests to reduce computational load - allowed_tels = [1,2] + allowed_tels = [1, 2] output = r1_tmp_path / "proton.r1.h5" argv = [ f"--input={proton_simtel_path}", @@ -100,11 +108,11 @@ def r1_proton_file(r1_tmp_path, proton_simtel_path): f"--DataWriter.write_r1_waveforms=True", "--SimTelEventSource.focal_length_choice=EQUIVALENT", f"--SimTelEventSource.allowed_tels={allowed_tels}", - ] assert run_tool(ProcessorTool(), argv=argv, cwd=r1_tmp_path) == 0 return output + @pytest.fixture(scope="session") def ctlearn_trained_r1_mono_models(r1_gamma_file, r1_proton_file, tmp_path_factory): """ @@ -142,7 +150,7 @@ def ctlearn_trained_r1_mono_models(r1_gamma_file, r1_proton_file, tmp_path_facto for reco_task in ["type", "energy", "cameradirection"]: # Output directory for trained model output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" - + # Build command-line arguments argv = [ f"--signal={signal_dir}", @@ -158,20 +166,25 @@ def ctlearn_trained_r1_mono_models(r1_gamma_file, r1_proton_file, tmp_path_facto # Include background only for classification task if reco_task == "type": - argv.extend([ - f"--background={background_dir}", - "--pattern-background=*.r1.h5", - "--DLWaveformReader.enforce_subarray_equality=False", - ]) + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.r1.h5", + "--DLWaveformReader.enforce_subarray_equality=False", + ] + ) # Run training assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 - ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"] = ( + output_dir / "ctlearn_model.keras" + ) # Check that the trained model exists assert ctlearn_trained_r1_mono_models[f"{telescope_type}_{reco_task}"].exists() return ctlearn_trained_r1_mono_models + @pytest.fixture(scope="session") def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): """ @@ -206,12 +219,12 @@ def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_fa telescope_types = { "LST": [1, 2], "MST": [7, 13, 15, 16, 17, 19], - #"SST": [30, 37, 43, 44, 53], + # "SST": [30, 37, 43, 44, 53], } image_mapper_types = { "LST": "BilinearMapper", "MST": "OversamplingMapper", - #"SST": "SquareMapper", + # "SST": "SquareMapper", } # Loop over telescope types and reconstruction tasks # and train models for each combination @@ -220,7 +233,7 @@ def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_fa for reco_task in ["type", "energy", "cameradirection"]: # Output directory for trained model output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" - + # Build command-line arguments argv = [ f"--signal={signal_dir}", @@ -235,23 +248,34 @@ def ctlearn_trained_dl1_mono_models(dl1_gamma_file, dl1_proton_file, tmp_path_fa # Include background only for classification task if reco_task == "type": - argv.extend([ - f"--background={background_dir}", - "--pattern-background=*.dl1.h5", - "--DLImageReader.enforce_subarray_equality=False", - f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", - ]) + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", + ] + ) # Run training - assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + assert ( + run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 + ) - ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"] = ( + output_dir / "ctlearn_model.keras" + ) # Check that the trained model exists - assert ctlearn_trained_dl1_mono_models[f"{telescope_type}_{reco_task}"].exists() + assert ctlearn_trained_dl1_mono_models[ + f"{telescope_type}_{reco_task}" + ].exists() return ctlearn_trained_dl1_mono_models + @pytest.fixture(scope="session") -def ctlearn_trained_dl1_stereo_models(dl1_gamma_file, dl1_proton_file, tmp_path_factory): +def ctlearn_trained_dl1_stereo_models( + dl1_gamma_file, dl1_proton_file, tmp_path_factory +): """ Test training CTLearn model using the R1 gamma and proton files for all reconstruction tasks. Each test run gets its own isolated temp directories. @@ -289,7 +313,7 @@ def ctlearn_trained_dl1_stereo_models(dl1_gamma_file, dl1_proton_file, tmp_path_ for reco_task in ["type", "energy", "skydirection"]: # Output directory for trained model output_dir = tmp_path / f"ctlearn_{telescope_type}_{reco_task}" - + # Build command-line arguments argv = [ f"--signal={signal_dir}", @@ -301,21 +325,27 @@ def ctlearn_trained_dl1_stereo_models(dl1_gamma_file, dl1_proton_file, tmp_path_ "--TrainCTLearnModel.stack_telescope_images=True", "--DLImageReader.mode=stereo", "--DLImageReader.focal_length_choice=EQUIVALENT", - f"--DLImageReader.allowed_tels={allowed_tels}" + f"--DLImageReader.allowed_tels={allowed_tels}", ] # Include background only for classification task if reco_task == "type": - argv.extend([ - f"--background={background_dir}", - "--pattern-background=*.dl1.h5", - "--DLImageReader.enforce_subarray_equality=False", - ]) + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ] + ) # Run training assert run_tool(TrainCTLearnModel(config=config), argv=argv, cwd=tmp_path) == 0 - ctlearn_trained_dl1_stereo_models[f"{telescope_type}_{reco_task}"] = output_dir / "ctlearn_model.keras" + ctlearn_trained_dl1_stereo_models[f"{telescope_type}_{reco_task}"] = ( + output_dir / "ctlearn_model.keras" + ) # Check that the trained model exists - assert ctlearn_trained_dl1_stereo_models[f"{telescope_type}_{reco_task}"].exists() - return ctlearn_trained_dl1_stereo_models \ No newline at end of file + assert ctlearn_trained_dl1_stereo_models[ + f"{telescope_type}_{reco_task}" + ].exists() + return ctlearn_trained_dl1_stereo_models diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index c1c63ba9..0fb238f9 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -125,6 +125,7 @@ class CannotPredict(OSError): """Raised when trying to predict an incompatible file""" + class PredictCTLearnModel(Tool): """ Base tool to predict the gammaness, energy and arrival direction from R1/DL1 data using CTLearn models. @@ -270,7 +271,7 @@ class PredictCTLearnModel(Tool): help=( "Name of the reconstruction algorithm used " "to generate the dl2 data for each task." - ) + ), ).tag(config=True) load_type_model_from = Path( @@ -358,7 +359,7 @@ class PredictCTLearnModel(Tool): flags = { **flag( - "overwrite" , + "overwrite", "HDF5Merger.overwrite", "Overwrite the output file if it exists", "Do not overwrite the output file if it exists", @@ -378,8 +379,8 @@ class PredictCTLearnModel(Tool): **flag( "dl2-subarray", "PredictCTLearnModel.dl2_subarray", - "Include dl2 telescope-event-wise data in the output file", - "Exclude dl2 telescope-event-wise data in the output file", + "Include dl2 subarray-event-wise data in the output file", + "Exclude dl2 subarray-event-wise data in the output file", ), **flag( "r0-waveforms", @@ -425,14 +426,13 @@ def setup(self): self.activity_start_time = Time.now() self.log.info("ctlearn version %s", ctlearn_version) # Validate the prefixes trait dictionary - validate_trait_dict(self.prefixes, ["type", "energy", "cameradirection", "skydirection", "all"]) + validate_trait_dict( + self.prefixes, ["type", "energy", "cameradirection", "skydirection", "all"] + ) # Copy selected tables from the input file to the output file self.log.info("Copying to output destination.") with HDF5Merger( - self.output_path, - dl2_subarray=False, - dl2_telescope=False, - parent=self + self.output_path, dl2_subarray=False, dl2_telescope=False, parent=self ) as merger: merger(self.input_url) # Create a MirroredStrategy. @@ -467,7 +467,7 @@ def finish(self): # Overwrite CTAO reference metadata to the output file self._overwrite_meta() self.log.info("Tool is shutting down") - + def _overwrite_meta(self): """Overwrite CTAO metadata in the output file.""" # TODO: Upgrade to new CTAO metatdata standard when available @@ -479,11 +479,17 @@ def _overwrite_meta(self): h5_file.root._v_attrs["CTA ACTIVITY NAME"] = self.name h5_file.root._v_attrs["CTA ACTIVITY SOFTWARE NAME"] = "ctlearn" h5_file.root._v_attrs["CTA ACTIVITY SOFTWARE VERSION"] = ctlearn_version - h5_file.root._v_attrs["CTA ACTIVITY START TIME"] = self.activity_start_time.iso + h5_file.root._v_attrs["CTA ACTIVITY START TIME"] = ( + self.activity_start_time.iso + ) h5_file.root._v_attrs["CTA ACTIVITY STOP TIME"] = Time.now().iso # Update CTA Product metadata - h5_file.root._v_attrs["CTA PRODUCT DATA LEVELS"] = self._get_data_levels(h5_file) - h5_file.root._v_attrs["CTA PRODUCT CREATION TIME"] = self.activity_start_time.iso + h5_file.root._v_attrs["CTA PRODUCT DATA LEVELS"] = ( + self._get_data_levels(h5_file) + ) + h5_file.root._v_attrs["CTA PRODUCT CREATION TIME"] = ( + self.activity_start_time.iso + ) h5_file.root._v_attrs["CTA PRODUCT ID"] = str(uuid.uuid4()) h5_file.flush() @@ -542,13 +548,9 @@ def _ensure_subarray_consistency(self): tel_trigger_groups = tel_trigger_table.group_by(SUBARRAY_EVENT_KEYS) tel_with_trigger = [] for tel_trigger in tel_trigger_groups.groups: - tel_with_trigger_mask = np.zeros( - len(self.dl1dh_reader.tel_ids), dtype=bool - ) + tel_with_trigger_mask = np.zeros(len(self.dl1dh_reader.tel_ids), dtype=bool) tel_with_trigger_mask[ - self.dl1dh_reader.subarray.tel_ids_to_indices( - tel_trigger["tel_id"] - ) + self.dl1dh_reader.subarray.tel_ids_to_indices(tel_trigger["tel_id"]) ] = True tel_with_trigger.append(tel_with_trigger_mask) @@ -572,7 +574,7 @@ def _ensure_subarray_consistency(self): sim_shower_table, subarray_trigger_table, keys=SUBARRAY_EVENT_KEYS, - join_type="right" + join_type="right", ) sim_shower_table.sort(SUBARRAY_EVENT_KEYS) write_table( @@ -609,7 +611,7 @@ def prune_group(group, valid_ids): if group is not None: prune_group(group, tel_ids) - #Camera configuration tables + # Camera configuration tables layout_node = getattr(h5_file.root, CONFIG_INSTRUMENT_SUBARRAY_LAYOUT, None) camera_group = getattr(h5_file.root, CONFIG_INSTRUMENT_TEL_CAMERA, None) if not (layout_node and camera_group): @@ -622,7 +624,7 @@ def prune_group(group, valid_ids): ) camera_indices = set(layout_table.col("camera_index")) prune_group(camera_group, camera_indices) - + def _create_nan_table(self, nonexample_identifiers, columns, shapes, reco_task): """ Create a table with NaNs for missing predictions. @@ -664,8 +666,8 @@ def _create_nan_table(self, nonexample_identifiers, columns, shapes, reco_task): def deduplicate_first_valid( self, table: Table, - keys=('obs_id', 'event_id'), - valid_col='CTLearn_is_valid', + keys=("obs_id", "event_id"), + valid_col="CTLearn_is_valid", ): """ Return a deduplicated Astropy Table. @@ -676,12 +678,9 @@ def deduplicate_first_valid( t = table.copy() - t.sort( - list(keys) + [valid_col], - reverse=[False] * len(keys) + [True] - ) + t.sort(list(keys) + [valid_col], reverse=[False] * len(keys) + [True]) - return unique(t, keys=list(keys), keep='first') + return unique(t, keys=list(keys), keep="first") def _predict_with_model(self, model_path): """ @@ -880,7 +879,9 @@ def _predict_energy(self, example_identifiers): ) # Create prediction table and add the reconstructed energy in TeV energy_table = example_identifiers.copy() - energy_table.add_column(reco_energy, name=f"{self.prefixes['energy']}_tel_energy") + energy_table.add_column( + reco_energy, name=f"{self.prefixes['energy']}_tel_energy" + ) return energy_table, feature_vectors def _predict_cameradirection(self, example_identifiers): @@ -1017,8 +1018,14 @@ def _transform_cam_coord_offsets_to_sky(self, table) -> Table: # Transform the true Alt/Az coordinates to camera coordinates reco_direction = cam_coord_offset.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table - table.add_column(reco_direction.az.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_az") - table.add_column(reco_direction.alt.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_alt") + table.add_column( + reco_direction.az.to(u.deg), + name=f"{self.prefixes['cameradirection']}_tel_az", + ) + table.add_column( + reco_direction.alt.to(u.deg), + name=f"{self.prefixes['cameradirection']}_tel_alt", + ) # Remove unnecessary columns from the table that do not the ctapipe DL2 data format table.remove_columns( [ @@ -1081,8 +1088,12 @@ def _transform_spher_coord_offsets_to_sky(self, table) -> Table: # Transform the reco direction from nominal frame to the AltAz frame sky_coord = reco_direction.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table - table.add_column(sky_coord.az.to(u.deg), name=f"{self.prefixes['skydirection']}_az") - table.add_column(sky_coord.alt.to(u.deg), name=f"{self.prefixes['skydirection']}_alt") + table.add_column( + sky_coord.az.to(u.deg), name=f"{self.prefixes['skydirection']}_az" + ) + table.add_column( + sky_coord.alt.to(u.deg), name=f"{self.prefixes['skydirection']}_alt" + ) # Remove unnecessary columns from the table that do not the ctapipe DL2 data format table.remove_columns( [ @@ -1224,7 +1235,9 @@ def _create_feature_vectors_table( name=f"{self.prefixes['all']}_tel_particletype_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefixes['all']}_tel_particletype_feature_vectors") + columns_list.append( + f"{self.prefixes['all']}_tel_particletype_feature_vectors" + ) shapes_list.append( ( len(nonexample_identifiers), @@ -1234,10 +1247,13 @@ def _create_feature_vectors_table( if energy_feature_vectors is not None: is_valid_col = ~np.isnan(np.min(energy_feature_vectors, axis=1), dtype=bool) feature_vector_table.add_column( - energy_feature_vectors, name=f"{self.prefixes['all']}_tel_energy_feature_vectors" + energy_feature_vectors, + name=f"{self.prefixes['all']}_tel_energy_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefixes['all']}_tel_energy_feature_vectors") + columns_list.append( + f"{self.prefixes['all']}_tel_energy_feature_vectors" + ) shapes_list.append( ( len(nonexample_identifiers), @@ -1256,7 +1272,9 @@ def _create_feature_vectors_table( name=f"{self.prefixes['all']}_tel_geometry_feature_vectors", ) if nonexample_identifiers is not None: - columns_list.append(f"{self.prefixes['all']}_tel_geometry_feature_vectors") + columns_list.append( + f"{self.prefixes['all']}_tel_geometry_feature_vectors" + ) shapes_list.append( ( len(nonexample_identifiers), @@ -1401,7 +1419,7 @@ def start(self): add_defaults_and_meta( particletype_table, ParticleClassificationContainer, - prefix=self.prefixes['type'], + prefix=self.prefixes["type"], add_tel_prefix=True, ) if self.dl2_telescope: @@ -1423,7 +1441,7 @@ def start(self): self.output_path, f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{tel_id:03d}", ) - + if self.dl2_subarray: self.log.info("Processing and storing the subarray type prediction...") # If only one telescope is used, copy the particletype table @@ -1455,7 +1473,7 @@ def start(self): else: self.type_stereo_combiner = StereoCombiner.from_name( self.stereo_combiner_cls, - prefix=self.prefixes['type'], + prefix=self.prefixes["type"], property=ReconstructionProperty.PARTICLE_TYPE, parent=self, ) @@ -1466,7 +1484,9 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - particletype_subarray_table[f"{self.prefixes['type']}_telescopes"].dtype + particletype_subarray_table[ + f"{self.prefixes['type']}_telescopes" + ].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1479,7 +1499,9 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - particletype_subarray_table[f"{self.prefixes['type']}_telescopes"] + particletype_subarray_table[ + f"{self.prefixes['type']}_telescopes" + ] ): if not tel_id_mask: continue @@ -1490,15 +1512,15 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - particletype_subarray_table[f"{self.prefixes['type']}_telescopes"] = ( - reco_telescopes - ) + particletype_subarray_table[ + f"{self.prefixes['type']}_telescopes" + ] = reco_telescopes # Deduplicate the subarray particletype table to have only one entry per event particletype_subarray_table = super().deduplicate_first_valid( table=particletype_subarray_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefixes['type']}_is_valid", - ) + ) # Sort the subarray particletype table particletype_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file @@ -1542,7 +1564,8 @@ def start(self): # Add is_valid column to the energy table energy_table.add_column( ~np.isnan( - energy_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool + energy_table[f"{self.prefixes['energy']}_tel_energy"].data, + dtype=bool, ), name=f"{self.prefixes['energy']}_tel_is_valid", ) @@ -1550,7 +1573,7 @@ def start(self): add_defaults_and_meta( energy_table, ReconstructedEnergyContainer, - prefix=self.prefixes['energy'], + prefix=self.prefixes["energy"], add_tel_prefix=True, ) if self.dl2_telescope: @@ -1593,14 +1616,16 @@ def start(self): energy_subarray_table.add_column( [ [val] - for val in energy_subarray_table[f"{self.prefixes['energy']}_is_valid"] + for val in energy_subarray_table[ + f"{self.prefixes['energy']}_is_valid" + ] ], name=f"{self.prefixes['energy']}_telescopes", ) else: self.energy_stereo_combiner = StereoCombiner.from_name( self.stereo_combiner_cls, - prefix=self.prefixes['energy'], + prefix=self.prefixes["energy"], property=ReconstructionProperty.ENERGY, parent=self, ) @@ -1611,7 +1636,9 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - energy_subarray_table[f"{self.prefixes['energy']}_telescopes"].dtype + energy_subarray_table[ + f"{self.prefixes['energy']}_telescopes" + ].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1624,7 +1651,9 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - energy_subarray_table[f"{self.prefixes['energy']}_telescopes"] + energy_subarray_table[ + f"{self.prefixes['energy']}_telescopes" + ] ): if not tel_id_mask: continue @@ -1635,15 +1664,15 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - energy_subarray_table[f"{self.prefixes['energy']}_telescopes"] = ( - reco_telescopes - ) + energy_subarray_table[ + f"{self.prefixes['energy']}_telescopes" + ] = reco_telescopes # Deduplicate the subarray energy table to have only one entry per event energy_subarray_table = super().deduplicate_first_valid( table=energy_subarray_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefixes['energy']}_is_valid", - ) + ) # Sort the subarray energy table energy_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file @@ -1693,13 +1722,14 @@ def start(self): ) # Produce output table with NaNs for missing predictions nan_telescope_mask = nonexample_identifiers["tel_id"] == tel_id - nonexample_identifiers_tel = nonexample_identifiers[ - nan_telescope_mask - ] + nonexample_identifiers_tel = nonexample_identifiers[nan_telescope_mask] if len(nonexample_identifiers_tel) > 0: nan_table = super()._create_nan_table( nonexample_identifiers_tel, - columns=[f"{self.prefixes['cameradirection']}_tel_alt", f"{self.prefixes['cameradirection']}_tel_az"], + columns=[ + f"{self.prefixes['cameradirection']}_tel_alt", + f"{self.prefixes['cameradirection']}_tel_az", + ], shapes=[ (len(nonexample_identifiers_tel),), (len(nonexample_identifiers_tel),), @@ -1711,7 +1741,9 @@ def start(self): # Add is_valid column to the direction table direction_tel_table.add_column( ~np.isnan( - direction_tel_table[f"{self.prefixes['cameradirection']}_tel_alt"].data, + direction_tel_table[ + f"{self.prefixes['cameradirection']}_tel_alt" + ].data, dtype=bool, ), name=f"{self.prefixes['cameradirection']}_tel_is_valid", @@ -1720,7 +1752,7 @@ def start(self): add_defaults_and_meta( direction_tel_table, ReconstructedGeometryContainer, - prefix=self.prefixes['cameradirection'], + prefix=self.prefixes["cameradirection"], add_tel_prefix=True, ) direction_tel_tables.append(direction_tel_table) @@ -1771,7 +1803,7 @@ def start(self): else: self.geometry_stereo_combiner = StereoCombiner.from_name( self.stereo_combiner_cls, - prefix=self.prefixes['cameradirection'], + prefix=self.prefixes["cameradirection"], property=ReconstructionProperty.GEOMETRY, parent=self, ) @@ -1784,7 +1816,9 @@ def start(self): # TODO: Remove temporary fix once the stereo combiner returns correct table # Check if the table has to be converted to a boolean mask if ( - direction_subarray_table[f"{self.prefixes['cameradirection']}_telescopes"].dtype + direction_subarray_table[ + f"{self.prefixes['cameradirection']}_telescopes" + ].dtype != np.bool_ ): # Create boolean mask for telescopes that participate in the stereo reconstruction combination @@ -1797,7 +1831,9 @@ def start(self): ) # Loop over the table and set the boolean mask for the telescopes for index, tel_id_mask in enumerate( - direction_subarray_table[f"{self.prefixes['cameradirection']}_telescopes"] + direction_subarray_table[ + f"{self.prefixes['cameradirection']}_telescopes" + ] ): if not tel_id_mask: continue @@ -1808,15 +1844,15 @@ def start(self): ) ] = True # Overwrite the column with the boolean mask with fix length - direction_subarray_table[f"{self.prefixes['cameradirection']}_telescopes"] = ( - reco_telescopes - ) + direction_subarray_table[ + f"{self.prefixes['cameradirection']}_telescopes" + ] = reco_telescopes # Deduplicate the subarray direction table to have only one entry per event direction_subarray_table = super().deduplicate_first_valid( table=direction_subarray_table, keys=SUBARRAY_EVENT_KEYS, valid_col=f"{self.prefixes['cameradirection']}_is_valid", - ) + ) # Sort the subarray geometry table direction_subarray_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file @@ -2016,7 +2052,8 @@ def start(self): ) if self.dl2_subarray: particletype_table.rename_column( - f"{self.prefixes['all']}_telescopes", f"{self.prefixes['type']}_telescopes" + f"{self.prefixes['all']}_telescopes", + f"{self.prefixes['type']}_telescopes", ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: @@ -2030,14 +2067,17 @@ def start(self): # Add is_valid column to the particletype table particletype_table.add_column( ~np.isnan( - particletype_table[f"{self.prefixes['type']}_tel_prediction"].data, + particletype_table[ + f"{self.prefixes['type']}_tel_prediction" + ].data, dtype=bool, ), name=f"{self.prefixes['type']}_is_valid", ) # Rename the columns for the stereo mode particletype_table.rename_column( - f"{self.prefixes['type']}_tel_prediction", f"{self.prefixes['type']}_prediction" + f"{self.prefixes['type']}_tel_prediction", + f"{self.prefixes['type']}_prediction", ) # Deduplicate the subarray particletype table to have only one entry per event particletype_table = super().deduplicate_first_valid( @@ -2050,7 +2090,7 @@ def start(self): add_defaults_and_meta( particletype_table, ParticleClassificationContainer, - prefix=self.prefixes['type'], + prefix=self.prefixes["type"], ) # Save the prediction to the output file write_table( @@ -2083,7 +2123,8 @@ def start(self): ) if self.dl2_subarray: energy_table.rename_column( - f"{self.prefixes['all']}_telescopes", f"{self.prefixes['energy']}_telescopes" + f"{self.prefixes['all']}_telescopes", + f"{self.prefixes['energy']}_telescopes", ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: @@ -2097,13 +2138,15 @@ def start(self): # Add is_valid column to the energy table energy_table.add_column( ~np.isnan( - energy_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool + energy_table[f"{self.prefixes['energy']}_tel_energy"].data, + dtype=bool, ), name=f"{self.prefixes['energy']}_is_valid", ) # Rename the columns for the stereo mode energy_table.rename_column( - f"{self.prefixes['energy']}_tel_energy", f"{self.prefixes['energy']}_energy" + f"{self.prefixes['energy']}_tel_energy", + f"{self.prefixes['energy']}_energy", ) # Deduplicate the subarray energy table to have only one entry per event energy_table = super().deduplicate_first_valid( @@ -2116,7 +2159,7 @@ def start(self): add_defaults_and_meta( energy_table, ReconstructedEnergyContainer, - prefix=self.prefixes['energy'], + prefix=self.prefixes["energy"], ) # Save the prediction to the output file write_table( @@ -2155,7 +2198,8 @@ def start(self): ) if self.dl2_subarray: direction_table.rename_column( - f"{self.prefixes['all']}_telescopes", f"{self.prefixes['skydirection']}_telescopes" + f"{self.prefixes['all']}_telescopes", + f"{self.prefixes['skydirection']}_telescopes", ) # Transform the spherical coordinate offsets to sky coordinates direction_table = super()._transform_spher_coord_offsets_to_sky( @@ -2165,7 +2209,10 @@ def start(self): if len(nonexample_identifiers) > 0: nan_table = super()._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefixes['skydirection']}_alt", f"{self.prefixes['skydirection']}_az"], + columns=[ + f"{self.prefixes['skydirection']}_alt", + f"{self.prefixes['skydirection']}_az", + ], shapes=[ (len(nonexample_identifiers),), (len(nonexample_identifiers),), @@ -2175,7 +2222,10 @@ def start(self): direction_table = vstack([direction_table, nan_table]) # Add is_valid column to the direction table direction_table.add_column( - ~np.isnan(direction_table[f"{self.prefixes['skydirection']}_alt"].data, dtype=bool), + ~np.isnan( + direction_table[f"{self.prefixes['skydirection']}_alt"].data, + dtype=bool, + ), name=f"{self.prefixes['skydirection']}_is_valid", ) # Deduplicate the subarray direction table to have only one entry per event @@ -2189,7 +2239,7 @@ def start(self): add_defaults_and_meta( direction_table, ReconstructedGeometryContainer, - prefix=self.prefixes['skydirection'], + prefix=self.prefixes["skydirection"], ) # Save the prediction to the output file write_table( @@ -2241,7 +2291,8 @@ def start(self): f"{self.prefixes['all']}_geometry_feature_vectors", ) feature_vector_table.rename_column( - f"{self.prefixes['all']}_tel_is_valid", f"{self.prefixes['all']}_is_valid" + f"{self.prefixes['all']}_tel_is_valid", + f"{self.prefixes['all']}_is_valid", ) feature_vector_table.sort(SUBARRAY_EVENT_KEYS) # Save the prediction to the output file diff --git a/ctlearn/tools/tests/test_predict_model.py b/ctlearn/tools/tests/test_predict_model.py index 7edeef85..5f17fc34 100644 --- a/ctlearn/tools/tests/test_predict_model.py +++ b/ctlearn/tools/tests/test_predict_model.py @@ -31,12 +31,14 @@ "CTLearnCameraReconstructor_telescopes", "CTLearnClassifier_telescopes", "CTLearnRegressor_telescopes", - "tels_with_trigger" + "tels_with_trigger", ] @pytest.mark.verifies_usecase("DPPS-UC-130-1.2") -def test_predict_mono_model_with_r1_waveforms(tmp_path, ctlearn_trained_r1_mono_models, r1_gamma_file): +def test_predict_mono_model_with_r1_waveforms( + tmp_path, ctlearn_trained_r1_mono_models, r1_gamma_file +): """ Test training CTLearn mono model using the R1 gamma and proton files for all reconstruction tasks and predicting DL2 from R1 waveforms. Each test run gets its own isolated temp directories. @@ -49,12 +51,15 @@ def test_predict_mono_model_with_r1_waveforms(tmp_path, ctlearn_trained_r1_mono_ dl2_dir.mkdir(parents=True, exist_ok=True) # Define telescope types and their available telescopes telescope_type = "LST" - available_tels = [1,2] - + available_tels = [1, 2] + # Hardcopy the trained models to the model directory for reco_task in ["type", "energy", "cameradirection"]: key = f"{telescope_type}_{reco_task}" - shutil.copy(ctlearn_trained_r1_mono_models[key], model_dir / f"ctlearn_mono_model_{key}.keras") + shutil.copy( + ctlearn_trained_r1_mono_models[key], + model_dir / f"ctlearn_mono_model_{key}.keras", + ) model_file = model_dir / f"ctlearn_mono_model_{key}.keras" assert model_file.exists(), f"Trained mono model file not found for {key}" # Build command-line arguments @@ -69,23 +74,31 @@ def test_predict_mono_model_with_r1_waveforms(tmp_path, ctlearn_trained_r1_mono_ ] output_file = dl2_dir / f"gamma_{telescope_type}_mono_from_waveforms.dl2.h5" # Run Prediction tool - assert run_tool( - MonoPredictCTLearnModel(), - argv = argv + [ - f"--output={output_file}", - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", - f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", - ], - cwd=tmp_path - ) == 0 + assert ( + run_tool( + MonoPredictCTLearnModel(), + argv=argv + + [ + f"--output={output_file}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + ], + cwd=tmp_path, + ) + == 0 + ) # Check that the output DL2 file was created assert output_file.exists(), "Output DL2 file not created" - # Check that the created DL2 file can be read with the TableLoader - with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check that the created DL2 file can be read with the TableLoader + with TableLoader( + output_file, pointing=True, focal_length_choice="EQUIVALENT" + ) as loader: # Check telescope-wise data - tel_events = loader.read_telescope_events_by_id(telescopes=available_tels, dl1_parameters=True, dl2=True) + tel_events = loader.read_telescope_events_by_id( + telescopes=available_tels, dl1_parameters=True, dl2=True + ) for tel_id in available_tels: assert len(tel_events[tel_id]) > 0 for col in REQUIRED_COLUMNS + [ @@ -95,20 +108,31 @@ def test_predict_mono_model_with_r1_waveforms(tmp_path, ctlearn_trained_r1_mono_ "telescope_pointing_azimuth", "telescope_pointing_altitude", ]: - assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" - assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" # Check subarray-wise data subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) assert len(subarray_events) > 0 for col in REQUIRED_COLUMNS: if "_tel_" in col: continue - assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" - assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + @pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") @pytest.mark.parametrize("dl2_tel_flag", ["dl2-telescope", "no-dl2-telescope"]) -def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_models, dl1_gamma_file, dl2_tel_flag): +def test_predict_mono_model_with_dl1_images( + tmp_path, ctlearn_trained_dl1_mono_models, dl1_gamma_file, dl2_tel_flag +): """ Test training CTLearn model using the DL1 gamma and proton files for all reconstruction tasks and predicting DL2 from DL1 images. Each test run gets its own isolated temp directories. @@ -123,18 +147,21 @@ def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_m telescope_types = { "LST": [2], "MST": [7, 13, 15, 16, 17, 19], - #"SST": [30, 37, 43, 44, 53], + # "SST": [30, 37, 43, 44, 53], } image_mapper_types = { "LST": "BilinearMapper", "MST": "OversamplingMapper", - #"SST": "SquareMapper", + # "SST": "SquareMapper", } # Hardcopy the trained models to the model directory for telescope_type in telescope_types.keys(): for reco_task in ["type", "energy", "cameradirection"]: key = f"{telescope_type}_{reco_task}" - shutil.copy(ctlearn_trained_dl1_mono_models[key], model_dir / f"ctlearn_mono_model_{key}.keras") + shutil.copy( + ctlearn_trained_dl1_mono_models[key], + model_dir / f"ctlearn_mono_model_{key}.keras", + ) model_file = model_dir / f"ctlearn_mono_model_{key}.keras" assert model_file.exists(), f"Trained mono model file not found for {key}" # Build command-line arguments @@ -147,27 +174,37 @@ def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_m f"--{dl2_tel_flag}", ] for telescope_type, allowed_tels in telescope_types.items(): - output_file = dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}_mono_from_images.dl2.h5" + output_file = ( + dl2_dir / f"gamma_{dl2_tel_flag}_{telescope_type}_mono_from_images.dl2.h5" + ) # Run Prediction tool - assert run_tool( - MonoPredictCTLearnModel(), - argv = argv + [ - f"--output={output_file}", - f"--DLImageReader.allowed_tels={allowed_tels}", - f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", - f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", - ], - cwd=tmp_path - ) == 0 + assert ( + run_tool( + MonoPredictCTLearnModel(), + argv=argv + + [ + f"--output={output_file}", + f"--DLImageReader.allowed_tels={allowed_tels}", + f"--DLImageReader.image_mapper_type={image_mapper_types[telescope_type]}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_cameradirection_model_from={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + ], + cwd=tmp_path, + ) + == 0 + ) # Check that the output DL2 file was created assert output_file.exists(), "Output DL2 file not created" - # Check that the created DL2 file can be read with the TableLoader - with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check that the created DL2 file can be read with the TableLoader + with TableLoader( + output_file, pointing=True, focal_length_choice="EQUIVALENT" + ) as loader: # Check telescope-wise data - tel_events = loader.read_telescope_events_by_id(telescopes=allowed_tels, dl1_parameters=True, dl2=True) + tel_events = loader.read_telescope_events_by_id( + telescopes=allowed_tels, dl1_parameters=True, dl2=True + ) for tel_id in allowed_tels: assert len(tel_events[tel_id]) > 0 for col in REQUIRED_COLUMNS + [ @@ -179,19 +216,30 @@ def test_predict_mono_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_mono_m ]: if dl2_tel_flag == "no-dl2-telescope" and "_tel_" in col: continue - assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" - assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" # Check subarray-wise data subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) assert len(subarray_events) > 0 for col in REQUIRED_COLUMNS: if "_tel_" in col: continue - assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" - assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + @pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") -def test_predict_stereo_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_stereo_models, dl1_gamma_file): +def test_predict_stereo_model_with_dl1_images( + tmp_path, ctlearn_trained_dl1_stereo_models, dl1_gamma_file +): """ Test training CTLearn stereo model using the DL1 gamma and proton files for all reconstruction tasks and predicting DL2 from DL1 images. Each test run gets its own isolated temp directories. @@ -205,11 +253,14 @@ def test_predict_stereo_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_ster # Define telescope types and their available telescopes telescope_type = "MST" allowed_tels = [7, 13, 15] - + # Hardcopy the trained models to the model directory for reco_task in ["type", "energy", "skydirection"]: key = f"{telescope_type}_{reco_task}" - shutil.copy(ctlearn_trained_dl1_stereo_models[key], model_dir / f"ctlearn_stereo_model_{key}.keras") + shutil.copy( + ctlearn_trained_dl1_stereo_models[key], + model_dir / f"ctlearn_stereo_model_{key}.keras", + ) model_file = model_dir / f"ctlearn_stereo_model_{key}.keras" assert model_file.exists(), f"Trained stereo model file not found for {key}" # Build command-line arguments @@ -225,23 +276,31 @@ def test_predict_stereo_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_ster ] output_file = dl2_dir / f"gamma_{telescope_type}_stereo_from_images.dl2.h5" # Run Prediction tool - assert run_tool( - StereoPredictCTLearnModel(), - argv = argv + [ - f"--output={output_file}", - f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_type.keras", - f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_energy.keras", - f"--PredictCTLearnModel.load_skydirection_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_skydirection.keras", - ], - cwd=tmp_path - ) == 0 + assert ( + run_tool( + StereoPredictCTLearnModel(), + argv=argv + + [ + f"--output={output_file}", + f"--PredictCTLearnModel.load_type_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_type.keras", + f"--PredictCTLearnModel.load_energy_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_energy.keras", + f"--PredictCTLearnModel.load_skydirection_model_from={model_dir}/ctlearn_stereo_model_{telescope_type}_skydirection.keras", + ], + cwd=tmp_path, + ) + == 0 + ) # Check that the output DL2 file was created assert output_file.exists(), "Output DL2 file not created" - # Check that the created DL2 file can be read with the TableLoader - with TableLoader(output_file, pointing=True, focal_length_choice="EQUIVALENT") as loader: + # Check that the created DL2 file can be read with the TableLoader + with TableLoader( + output_file, pointing=True, focal_length_choice="EQUIVALENT" + ) as loader: # Check telescope-wise data - tel_events = loader.read_telescope_events_by_id(telescopes=allowed_tels, dl1_parameters=True, dl2=True) + tel_events = loader.read_telescope_events_by_id( + telescopes=allowed_tels, dl1_parameters=True, dl2=True + ) for tel_id in allowed_tels: assert len(tel_events[tel_id]) > 0 for col in REQUIRED_COLUMNS + [ @@ -254,9 +313,15 @@ def test_predict_stereo_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_ster if "_tel_" in col: continue if "CTLearnCameraReconstructor" in col: - col = col.replace("CTLearnCameraReconstructor", "CTLearnSkyReconstructor") - assert col in tel_events[tel_id].colnames, f"{col} missing in DL2 file {output_file.name}" - assert tel_events[tel_id][col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" + col = col.replace( + "CTLearnCameraReconstructor", "CTLearnSkyReconstructor" + ) + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" # Check subarray-wise data subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) assert len(subarray_events) > 0 @@ -264,6 +329,12 @@ def test_predict_stereo_model_with_dl1_images(tmp_path, ctlearn_trained_dl1_ster if "_tel_" in col: continue if "CTLearnCameraReconstructor" in col: - col = col.replace("CTLearnCameraReconstructor", "CTLearnSkyReconstructor") - assert col in subarray_events.colnames, f"{col} missing in DL2 file {output_file.name}" - assert subarray_events[col][0] is not np.nan, f"{col} has NaN values in DL2 file {output_file.name}" \ No newline at end of file + col = col.replace( + "CTLearnCameraReconstructor", "CTLearnSkyReconstructor" + ) + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" diff --git a/ctlearn/tools/tests/test_train_model.py b/ctlearn/tools/tests/test_train_model.py index b32c5256..d2291652 100644 --- a/ctlearn/tools/tests/test_train_model.py +++ b/ctlearn/tools/tests/test_train_model.py @@ -5,6 +5,7 @@ from ctapipe.core import run_tool from ctlearn.tools import TrainCTLearnModel + @pytest.mark.parametrize("reco_task", ["type", "energy", "cameradirection"]) def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_path): """ @@ -36,16 +37,18 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat "--TrainCTLearnModel.n_epochs=2", "--TrainCTLearnModel.batch_size=4", "--DLImageReader.focal_length_choice=EQUIVALENT", - f"--DLImageReader.allowed_tels={allowed_tels}" + f"--DLImageReader.allowed_tels={allowed_tels}", ] # Include background only for classification task if reco_task == "type": - argv.extend([ - f"--background={background_dir}", - "--pattern-background=*.dl1.h5", - "--DLImageReader.enforce_subarray_equality=False", - ]) + argv.extend( + [ + f"--background={background_dir}", + "--pattern-background=*.dl1.h5", + "--DLImageReader.enforce_subarray_equality=False", + ] + ) # Run training assert run_tool(TrainCTLearnModel(), argv=argv, cwd=tmp_path) == 0 @@ -60,16 +63,16 @@ def test_train_ctlearn_model(reco_task, dl1_gamma_file, dl1_proton_file, tmp_pat # Read CSV and verify number of epochs log_df = pd.read_csv(log_file) num_epochs_logged = log_df.shape[0] - assert num_epochs_logged == 2, f"Expected two epochs, found {num_epochs_logged} for {reco_task}" + assert ( + num_epochs_logged == 2 + ), f"Expected two epochs, found {num_epochs_logged} for {reco_task}" # Check that val_loss column exists - assert "val_loss" in log_df.columns, ( - f"'val_loss' column missing in training_log.csv for {reco_task}" - ) + assert ( + "val_loss" in log_df.columns + ), f"'val_loss' column missing in training_log.csv for {reco_task}" val_loss = log_df["val_loss"].dropna() - assert not val_loss.empty, ( - f"'val_loss' column is empty for {reco_task}" - ) + assert not val_loss.empty, f"'val_loss' column is empty for {reco_task}" assert ((val_loss >= 0.0) & (val_loss <= 1.0)).all(), ( f"'val_loss' values out of range [0.0, 1.0] for {reco_task}: " f"{val_loss.tolist()}" - ) \ No newline at end of file + ) diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py index 5b25ab07..e4f729d2 100644 --- a/ctlearn/tools/train_model.py +++ b/ctlearn/tools/train_model.py @@ -114,9 +114,9 @@ class TrainCTLearnModel(Tool): help="List of specific file pattern for matching files in ``input_dir_background``", ).tag(config=True) - dl1dh_reader_type = ComponentName( - DLDataReader, default_value="DLImageReader" - ).tag(config=True) + dl1dh_reader_type = ComponentName(DLDataReader, default_value="DLImageReader").tag( + config=True + ) stack_telescope_images = Bool( default_value=False, @@ -136,9 +136,7 @@ class TrainCTLearnModel(Tool): ), ).tag(config=True) - model_type = ComponentName( - CTLearnModel, default_value="ResNet" - ).tag(config=True) + model_type = ComponentName(CTLearnModel, default_value="ResNet").tag(config=True) output_dir = Path( exits=False, @@ -151,14 +149,14 @@ class TrainCTLearnModel(Tool): reco_tasks = List( trait=CaselessStrEnum(["type", "energy", "cameradirection", "skydirection"]), - allow_none=False, + allow_none=False, help=( "List of reconstruction tasks to perform. " "'type': classification of the primary particle type; " "'energy': regression of the primary particle energy; " "'cameradirection': reconstruction of the primary particle arrival direction in camera coordinates; " "'skydirection': reconstruction of the primary particle arrival direction in sky coordinates." - ) + ), ).tag(config=True) n_epochs = Int( @@ -187,20 +185,29 @@ class TrainCTLearnModel(Tool): ).tag(config=True) optimizer = Dict( - default_value={"name": "Adam", "base_learning_rate": 0.0001, "adam_epsilon": 1.0e-8}, - help=( - "Optimizer to use for training. " - "E.g. {'name': 'Adam', 'base_learning_rate': 0.0001, 'adam_epsilon': 1.0e-8}. " - ) + default_value={ + "name": "Adam", + "base_learning_rate": 0.0001, + "adam_epsilon": 1.0e-8, + }, + help=( + "Optimizer to use for training. " + "E.g. {'name': 'Adam', 'base_learning_rate': 0.0001, 'adam_epsilon': 1.0e-8}. " + ), ).tag(config=True) lr_reducing = Dict( - default_value={"factor": 0.5, "patience": 5, "min_delta": 0.01, "min_lr": 0.000001}, + default_value={ + "factor": 0.5, + "patience": 5, + "min_delta": 0.01, + "min_lr": 0.000001, + }, allow_none=True, - help=( - "Learning rate reducing parameters for the Keras callback. " - "E.g. {'factor': 0.5, 'patience': 5, 'min_delta': 0.01, 'min_lr': 0.000001}. " - ) + help=( + "Learning rate reducing parameters for the Keras callback. " + "E.g. {'factor': 0.5, 'patience': 5, 'min_delta': 0.01, 'min_lr': 0.000001}. " + ), ).tag(config=True) random_seed = Int( @@ -209,7 +216,7 @@ class TrainCTLearnModel(Tool): "Random seed for shuffling the data " "before the training/validation split " "and after the end of an epoch." - ) + ), ).tag(config=True) save_onnx = Bool( @@ -217,17 +224,16 @@ class TrainCTLearnModel(Tool): allow_none=False, help="Set whether to save model in an ONNX file.", ).tag(config=True) - + early_stopping = Dict( default_value=None, allow_none=True, - help=( - "Early stopping parameters for the Keras callback. " - "E.g. {'monitor': 'val_loss', 'patience': 4, 'verbose': 1, 'restore_best_weights': True}. " - ) + help=( + "Early stopping parameters for the Keras callback. " + "E.g. {'monitor': 'val_loss', 'patience': 4, 'verbose': 1, 'restore_best_weights': True}. " + ), ).tag(config=True) - aliases = { "signal": "TrainCTLearnModel.input_dir_signal", "background": "TrainCTLearnModel.input_dir_background", @@ -258,7 +264,9 @@ def setup(self): self.input_url_background = [] if self.input_dir_background is not None: for background_pattern in self.file_pattern_background: - self.input_url_background.extend(self.input_dir_background.glob(background_pattern)) + self.input_url_background.extend( + self.input_dir_background.glob(background_pattern) + ) # Set up the data reader self.log.info("Loading data:") @@ -276,8 +284,12 @@ def setup(self): ) self.log.info("Number of events loaded: %s", self.dl1dh_reader._get_n_events()) if "type" in self.reco_tasks: - self.log.info("Number of signal events: %d", self.dl1dh_reader.n_signal_events) - self.log.info("Number of background events: %d", self.dl1dh_reader.n_bkg_events) + self.log.info( + "Number of signal events: %d", self.dl1dh_reader.n_signal_events + ) + self.log.info( + "Number of background events: %d", self.dl1dh_reader.n_bkg_events + ) # Check if the number of events is enough to form a batch if self.dl1dh_reader._get_n_events() < self.batch_size: raise ValueError( @@ -295,7 +307,10 @@ def setup(self): f"Cannot stack telescope images in mono mode. Use stereo mode for stacking." ) # Ckeck if only one telescope type is selected for stacking telescope images - if self.stack_telescope_images and len(list(self.dl1dh_reader.selected_telescopes)) > 1: + if ( + self.stack_telescope_images + and len(list(self.dl1dh_reader.selected_telescopes)) > 1 + ): raise ToolConfigurationError( f"Cannot stack telescope images from multiple telescope types. Use only one telescope type." ) @@ -310,14 +325,16 @@ def setup(self): # Shuffle the indices before the training/validation split np.random.seed(self.random_seed) np.random.shuffle(indices) - n_validation_examples = int(self.validation_split * self.dl1dh_reader._get_n_events()) + n_validation_examples = int( + self.validation_split * self.dl1dh_reader._get_n_events() + ) training_indices = indices[n_validation_examples:] validation_indices = indices[:n_validation_examples] self.training_loader = DLDataLoader( self.dl1dh_reader, training_indices, tasks=self.reco_tasks, - batch_size=self.batch_size*self.strategy.num_replicas_in_sync, + batch_size=self.batch_size * self.strategy.num_replicas_in_sync, random_seed=self.random_seed, sort_by_intensity=self.sort_by_intensity, stack_telescope_images=self.stack_telescope_images, @@ -326,7 +343,7 @@ def setup(self): self.dl1dh_reader, validation_indices, tasks=self.reco_tasks, - batch_size=self.batch_size*self.strategy.num_replicas_in_sync, + batch_size=self.batch_size * self.strategy.num_replicas_in_sync, random_seed=self.random_seed, sort_by_intensity=self.sort_by_intensity, stack_telescope_images=self.stack_telescope_images, @@ -352,23 +369,32 @@ def setup(self): csv_logger_callback = keras.callbacks.CSVLogger( filename=f"{self.output_dir}/training_log.csv", append=True ) - self.callbacks = [model_checkpoint_callback, tensorboard_callback, csv_logger_callback] - + self.callbacks = [ + model_checkpoint_callback, + tensorboard_callback, + csv_logger_callback, + ] + if self.early_stopping is not None: # EarlyStopping callback - validate_trait_dict(self.early_stopping, ["monitor", "patience", "verbose", "restore_best_weights"]) + validate_trait_dict( + self.early_stopping, + ["monitor", "patience", "verbose", "restore_best_weights"], + ) early_stopping_callback = keras.callbacks.EarlyStopping( - monitor=self.early_stopping["monitor"], - patience=self.early_stopping["patience"], - verbose=self.early_stopping["verbose"], - restore_best_weights=self.early_stopping["restore_best_weights"] + monitor=self.early_stopping["monitor"], + patience=self.early_stopping["patience"], + verbose=self.early_stopping["verbose"], + restore_best_weights=self.early_stopping["restore_best_weights"], ) self.callbacks.append(early_stopping_callback) # Learning rate reducing callback if self.lr_reducing is not None: # Validate the learning rate reducing parameters - validate_trait_dict(self.lr_reducing, ["factor", "patience", "min_delta", "min_lr"]) + validate_trait_dict( + self.lr_reducing, ["factor", "patience", "min_delta", "min_lr"] + ) lr_reducing_callback = keras.callbacks.ReduceLROnPlateau( monitor=monitor, factor=self.lr_reducing["factor"], @@ -380,10 +406,8 @@ def setup(self): ) self.callbacks.append(lr_reducing_callback) - - def start(self): - + # Open a strategy scope. with self.strategy.scope(): # Construct the model @@ -397,7 +421,7 @@ def start(self): # Validate the optimizer parameters validate_trait_dict(self.optimizer, ["name", "base_learning_rate"]) # Set the learning rate for the optimizer - learning_rate = self.optimizer["base_learning_rate"] + learning_rate = self.optimizer["base_learning_rate"] # Set the epsilon for the Adam optimizer adam_epsilon = None if self.optimizer["name"] == "Adam": @@ -416,7 +440,10 @@ def start(self): keras.optimizers.Adam, dict(learning_rate=learning_rate, epsilon=adam_epsilon), ), - "RMSProp": (keras.optimizers.RMSprop, dict(learning_rate=learning_rate)), + "RMSProp": ( + keras.optimizers.RMSprop, + dict(learning_rate=learning_rate), + ), "SGD": (keras.optimizers.SGD, dict(learning_rate=learning_rate)), } # Get the optimizer function and arguments @@ -425,7 +452,9 @@ def start(self): losses, metrics = self._get_losses_and_mertics(self.reco_tasks) # Compile the model self.log.info("Compiling CTLearn model.") - self.model.compile(optimizer=optimizer_fn(**optimizer_args), loss=losses, metrics=metrics) + self.model.compile( + optimizer=optimizer_fn(**optimizer_args), loss=losses, metrics=metrics + ) # Train and evaluate the model self.log.info("Training and evaluating...") @@ -439,7 +468,6 @@ def start(self): ) self.log.info("Training and evaluating finished succesfully!") - def finish(self): # Saving model weights in onnx format @@ -453,7 +481,9 @@ def finish(self): output_path = f"{self.output_dir}/ctlearn_model.onnx" tf2onnx.convert.from_keras( - self.model, input_signature=self.model.input_layer.input._type_spec, output_path=output_path + self.model, + input_signature=self.model.input_layer.input._type_spec, + output_path=output_path, ) self.log.info("ONNX model saved in %s", self.output_dir) @@ -502,12 +532,16 @@ def _get_losses_and_mertics(self, tasks): losses["cameradirection"] = keras.losses.MeanAbsoluteError( reduction="sum_over_batch_size" ) - metrics["cameradirection"] = keras.metrics.MeanAbsoluteError(name="mae_cameradirection") + metrics["cameradirection"] = keras.metrics.MeanAbsoluteError( + name="mae_cameradirection" + ) if "skydirection" in self.reco_tasks: losses["skydirection"] = keras.losses.MeanAbsoluteError( reduction="sum_over_batch_size" ) - metrics["skydirection"] = keras.metrics.MeanAbsoluteError(name="mae_skydirection") + metrics["skydirection"] = keras.metrics.MeanAbsoluteError( + name="mae_skydirection" + ) return losses, metrics From 2c9dc65914b9c0e68fbe348be97f069198ed581b Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 23 Jan 2026 09:43:07 +0100 Subject: [PATCH 23/27] bump ctapipe and dl1dh --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 107325d7..bab81d9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,7 @@ classifiers = [ requires-python = ">=3.12" dependencies = [ - "dl1_data_handler>=0.14.5", + "dl1_data_handler>=0.14.8", "astropy", "numpy", "pandas", @@ -39,7 +39,7 @@ dependencies = [ "tensorflow>=2.16", "pydot", "setuptools", - "ctapipe[all]>=0.28", + "ctapipe[all]>=0.29", ] dynamic = ["version"] From 59303ddd195c13bb2b1118eae2d3180fa2808ecd Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 23 Jan 2026 13:56:48 +0100 Subject: [PATCH 24/27] updated installation for IT-cluster --- README.rst | 4 +-- docs/source/installation.rst | 60 +++++++++++++----------------------- 2 files changed, 24 insertions(+), 40 deletions(-) diff --git a/README.rst b/README.rst index f69dbb25..c5c4853f 100644 --- a/README.rst +++ b/README.rst @@ -37,7 +37,7 @@ First, create and activate a fresh conda environment: .. code-block:: bash mamba create -n ctlearn -c conda-forge python==3.12 llvmlite - mamba activate ctlearn + conda activate ctlearn The lastest version fo this package can be installed as a pip package: @@ -45,7 +45,7 @@ The lastest version fo this package can be installed as a pip package: pip install ctlearn -See the documentation for further information like `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. +See the documentation for further information like `installation instructions for the IT-cluster `_, `installation instructions for developers `_, `package usage `_, and `dependencies `_ among other topics. Citing this software -------------------- diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 055ec254..a4da1b33 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -11,63 +11,47 @@ The following command will set up a conda virtual environment, add the necessary package channels, and install CTLearn specified version and its dependencies: .. code-block:: bash + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite + conda activate ctlearn + pip install ctlearn - CTLEARN_VER=0.10.3 - wget https://raw.githubusercontent.com/ctlearn-project/ctlearn/v$CTLEARN_VER/environment.yml - conda env create -n [ENVIRONMENT_NAME] -f environment.yml - conda activate [ENVIRONMENT_NAME] - pip install ctlearn==$CTLEARN_VER - ctlearn -h +For working on the IT-cluster: +.. code-block:: bash + mamba create -n ctlearn-it-cluster -c conda-forge python==3.12 h5py scipy llvmlite gcc_linux-64 gxx_linux-64 openblas gfortran_linux-64 + conda activate ctlearn-it-cluster + export CC=$(which x86_64-conda-linux-gnu-gcc) + export CXX=$(which x86_64-conda-linux-gnu-g++) + export FC=$(which x86_64-conda-linux-gnu-gfortran) + pip install ctlearn -This should automatically install all dependencies (NOTE: this may take some time, as by default MKL is included as a dependency of NumPy and it is very large). +Please do not forget to update your LD_LIBRARY_PATH to include the necessary paths. For example, you can add the following line to your .bashrc file: +export LD_LIBRARY_PATH=/to/your/.conda/envs/ctlearn-it-cluster/lib:/fefs/aswg/workspace/tjark.miener/cudnn-linux-x86_64-8.9.7.29_cuda12-archive/lib:$LD_LIBRARY_PATH +Note: You would need to replace the /to/your/.conda/envs/ctlearn-it-cluster/lib with the path to your conda environment where ctlearn is installed. cudnn-linux-x86_64-8.9.7.29_cuda12-archive is the path to the cuDNN libraries for CUDA 12. -For working on the IT-cluster, please do not forget to update your LD_LIBRARY_PATH to include the necessary paths. For example, you can add the following line to your .bashrc file: -export LD_LIBRARY_PATH=/to/your/.conda/envs/ctlearn/lib:/fefs/aswg/workspace/tjark.miener/cudnn-linux-x86_64-8.9.2.26_cuda11-archive/lib:/fefs/aswg/workspace/tjark.miener/cudnn-linux-x86_64-8.9.7.29_cuda12-archive/lib:$LD_LIBRARY_PATH -Note: You would need to replace the /to/your/.conda/envs/ctlearn/lib with the path to your conda environment where ctlearn is installed. cudnn-linux-x86_64-8.9.2.26_cuda11-archive and cudnn-linux-x86_64-8.9.7.29_cuda12-archive are the paths to the cuDNN libraries for CUDA 11 and CUDA 12, respectively. Installing with pip/setuptools from source for development ---------------------------------------------------------- -Clone the CTLearn repository: +First, install Anaconda by following the instructions above. Create a new conda environment that includes all the dependencies for CTLearn. Then, clone the CTLearn repository and install CTLearn into the new conda environment with pip from source: .. code-block:: bash + mamba create -n ctlearn -c conda-forge python==3.12 llvmlite + conda activate ctlearn cd git clone https://github.com/ctlearn-project/ctlearn.git + pip install -e . -First, install Anaconda by following the instructions above. Create a new conda environment that includes all the dependencies for CTLearn: - -.. code-block:: bash - - conda env create -f /ctlearn/environment.yml - -Finally, install CTLearn into the new conda environment via pypi: - -.. code-block:: bash - - conda activate ctlearn - pip install ctlearn==0.10.3 - -or with pip from source: - -.. code-block:: bash - - conda activate ctlearn - - cd /ctlearn - pip install . - -NOTE for developers: If you wish to fork/clone the repository and edit the code, install with ``pip -e``. Dependencies ------------ -* Python>=3.10 -* TensorFlow>=2.14,<2.15 -* ctapipe>=0.22.0,<0.26 +* Python>=3.12 +* TensorFlow>=2.16 +* ctapipe>=0.29.0 * ctaplot -* DL1DataHandler>=0.14.5,<0.15 +* DL1DataHandler>=0.14.8 * numba * NumPy * Pandas From 3a4b3c2d937a165bc2818d108f1228e5f3897ff4 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 23 Jan 2026 14:02:38 +0100 Subject: [PATCH 25/27] updated Dockerfile with tensorflow:25.02-tf2-py3 --- Dockerfile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Dockerfile b/Dockerfile index 72881b4a..4a08b151 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Stage 1: Build the ctlearn wheel using a standard Python image -FROM python:3.11 AS builder +FROM python:3.12 AS builder # Install git (needed for setuptools_scm during build) and build tool RUN apt-get update \ @@ -18,9 +18,7 @@ COPY ./.git ./.git/ RUN python -m build --wheel # Stage 2: Create the final runtime image BASED ON NVIDIA's TF image -# TODO what version to use ? after 24.?? TF 2.14 is not found in the container -FROM nvcr.io/nvidia/tensorflow:24.01-tf2-py3 - +FROM nvcr.io/nvidia/tensorflow:25.02-tf2-py3 # Copy only the built wheel from the builder stage's dist directory COPY --from=builder /repo/dist /tmp/dist From 3af6b5a2d7e34e7e40807c33a62b4bef097e1fd1 Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 23 Jan 2026 16:45:06 +0100 Subject: [PATCH 26/27] bug fix keras3 upgrade --- ctlearn/tools/predict_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ctlearn/tools/predict_model.py b/ctlearn/tools/predict_model.py index 0fb238f9..dba16638 100644 --- a/ctlearn/tools/predict_model.py +++ b/ctlearn/tools/predict_model.py @@ -739,7 +739,7 @@ def _predict_with_model(self, model_path): backbone_model = model.get_layer(index=1) # Create a new head model with the same layers as the original model. # The output of the backbone model is the input of the head model. - backbone_output_shape = keras.Input(model.layers[2].input_shape[1:]) + backbone_output_shape = keras.Input(model.layers[2].input.shape[1:]) x = backbone_output_shape for layer in model.layers[2:]: x = layer(x) From 8536cd823c9fbf05a5278d46a283b6e010fff27e Mon Sep 17 00:00:00 2001 From: TjarkMiener Date: Fri, 23 Jan 2026 16:46:22 +0100 Subject: [PATCH 27/27] added lstchain LST1 real data mock and test also LST1PredictionTool in CI --- ctlearn/conftest.py | 93 ++++++++++++++ ctlearn/tools/predict_LST1.py | 148 +++++++++++++---------- ctlearn/tools/tests/test_predict_LST1.py | 122 +++++++++++++++++++ 3 files changed, 300 insertions(+), 63 deletions(-) create mode 100644 ctlearn/tools/tests/test_predict_LST1.py diff --git a/ctlearn/conftest.py b/ctlearn/conftest.py index 20ce564e..cdfcdb18 100644 --- a/ctlearn/conftest.py +++ b/ctlearn/conftest.py @@ -2,13 +2,20 @@ common pytest fixtures for tests in ctlearn. """ +from pathlib import Path + +import numpy as np import pytest import shutil +from astropy import units as u +from astropy.table import Column, Table from traitlets.config.loader import Config from ctapipe.core import run_tool +from ctapipe.io import write_table from ctapipe.utils import get_dataset_path from ctlearn.tools import TrainCTLearnModel +from ctlearn.utils import get_lst1_subarray_description @pytest.fixture(scope="session") @@ -35,6 +42,92 @@ def r1_tmp_path(tmp_path_factory): return tmp_path_factory.mktemp("r1_") +def _create_mock_lst1_dl1_file( + output_path: str | Path, + n_events: int = 4, + random_seed: int | None = 0, +) -> Path: + """Write a minimal DL1 file with fake data compatible with the LST1PredictionTool.""" + + rng = np.random.default_rng(random_seed) + output_path = Path(output_path) + + subarray = get_lst1_subarray_description() + tel_id = 1 + n_pixels = subarray.tel[tel_id].camera.geometry.n_pixels + + obs_id = 1 + event_ids = np.arange(1, n_events + 1, dtype=np.int64) + + # Fake per-pixel data + image = rng.uniform(80, 150, size=(n_events, n_pixels)).astype(np.float32) + image_mask = rng.integers(0, 2, size=(n_events, n_pixels), dtype=bool) + peak_time = rng.normal(5.0, 0.5, size=(n_events, n_pixels)).astype(np.float32) + + image_table = Table() + image_table["obs_id"] = np.full(n_events, obs_id, dtype=np.int64) + image_table["event_id"] = event_ids + image_table["tel_id"] = np.full(n_events, tel_id, dtype=np.int16) + image_table.add_column( + Column(image, name="image", dtype=np.float32, shape=(n_pixels,)) + ) + image_table.add_column( + Column(image_mask, name="image_mask", dtype=bool, shape=(n_pixels,)) + ) + image_table.add_column( + Column(peak_time, name="peak_time", dtype=np.float32, shape=(n_pixels,)) + ) + + # DL1 parameter columns required by LST1PredictionTool + parameter_table = Table() + parameter_table["obs_id"] = np.full(n_events, obs_id, dtype=np.int64) + parameter_table["event_id"] = event_ids + parameter_table["tel_id"] = np.full(n_events, tel_id, dtype=np.int16) + parameter_table["intensity"] = rng.uniform(90, 140, size=n_events) + parameter_table["x"] = rng.normal(0.0, 0.05, size=n_events) + parameter_table["y"] = rng.normal(0.0, 0.05, size=n_events) + parameter_table["phi"] = rng.uniform(-np.pi, np.pi, size=n_events) + parameter_table["psi"] = rng.uniform(-np.pi, np.pi, size=n_events) + parameter_table["length"] = rng.uniform(0.05, 0.15, size=n_events) + parameter_table["length_uncertainty"] = rng.uniform(0.001, 0.003, size=n_events) + parameter_table["width"] = rng.uniform(0.02, 0.08, size=n_events) + parameter_table["width_uncertainty"] = rng.uniform(0.001, 0.003, size=n_events) + parameter_table["skewness"] = rng.normal(0.0, 0.2, size=n_events) + parameter_table["kurtosis"] = rng.normal(0.0, 0.2, size=n_events) + parameter_table["time_gradient"] = rng.normal(0.0, 0.01, size=n_events) + parameter_table["intercept"] = rng.normal(0.0, 0.01, size=n_events) + parameter_table["n_pixels"] = np.full(n_events, n_pixels, dtype=np.int16) + parameter_table["n_islands"] = np.zeros(n_events, dtype=np.int16) + parameter_table["event_type"] = np.full(n_events, 32, dtype=np.int16) + parameter_table["az_tel"] = np.full(n_events, 1.0) + parameter_table["alt_tel"] = np.full(n_events, 1.2) + parameter_table["dragon_time"] = np.linspace(1_700_000_000, 1_700_000_300, n_events) + + # Write to DL1 file the subarray description, image and parameter tables + subarray.to_hdf(output_path, overwrite=True) + write_table( + image_table, + output_path, + "/dl1/event/telescope/image/LST_LSTCam", + overwrite=True, + ) + write_table( + parameter_table, + output_path, + "/dl1/event/telescope/parameters/LST_LSTCam", + overwrite=True, + ) + return output_path + + +@pytest.fixture(scope="session") +def mock_lst1_dl1_file(tmp_path_factory): + """Path to a session-scoped mock LST-1 DL1 HDF5 file for tests.""" + + output = tmp_path_factory.mktemp("mock_lst1") / "mock_lst1_dl1.h5" + return _create_mock_lst1_dl1_file(output) + + @pytest.fixture(scope="session") def dl1_gamma_file(dl1_tmp_path, gamma_simtel_path): """ diff --git a/ctlearn/tools/predict_LST1.py b/ctlearn/tools/predict_LST1.py index a4751d9a..58069e56 100644 --- a/ctlearn/tools/predict_LST1.py +++ b/ctlearn/tools/predict_LST1.py @@ -6,8 +6,7 @@ import tables import keras from astropy import units as u -from astropy.coordinates.earth import EarthLocation -from astropy.coordinates import AltAz, Angle, SkyCoord +from astropy.coordinates import AltAz, SkyCoord from astropy.table import Table, join, setdiff, vstack from astropy.time import Time @@ -27,26 +26,36 @@ flag, CaselessStrEnum, ComponentName, - Unicode, + Dict, UseEnum, classes_with_traits, ) from ctapipe.instrument.optics import FocalLengthKind from ctapipe.io import read_table, write_table +from ctapipe.io.hdf5dataformat import ( + DL1_SUBARRAY_TRIGGER_TABLE, + DL1_TEL_GROUP, + DL1_TEL_PARAMETERS_GROUP, + DL1_TEL_POINTING_GROUP, + DL1_TEL_TRIGGER_TABLE, + DL2_TEL_PARTICLETYPE_GROUP, + DL2_TEL_ENERGY_GROUP, + DL2_TEL_GEOMETRY_GROUP, + DL2_SUBARRAY_PARTICLETYPE_GROUP, + DL2_SUBARRAY_ENERGY_GROUP, + DL2_SUBARRAY_GEOMETRY_GROUP, +) from ctapipe.reco.utils import add_defaults_and_meta -from ctlearn.core.model import LoadedModel -from ctlearn.utils import get_lst1_subarray_description +from ctlearn import __version__ as ctlearn_version +from ctlearn.utils import get_lst1_subarray_description, validate_trait_dict + from dl1_data_handler.image_mapper import ImageMapper from dl1_data_handler.reader import ( get_unmapped_image, TableQualityQuery, ) -POINTING_GROUP = "/dl1/monitoring/telescope/pointing" -DL1_TELESCOPE_GROUP = "/dl1/event/telescope" -DL2_TELESCOPE_GROUP = "/dl2/event/telescope" -DL2_SUBARRAY_GROUP = "/dl2/event/subarray" SUBARRAY_EVENT_KEYS = ["obs_id", "event_id"] TELESCOPE_EVENT_KEYS = ["obs_id", "event_id", "tel_id"] @@ -111,10 +120,18 @@ class LST1PredictionTool(Tool): help="Set whether to include dl2 subarray-event-wise data in the output file.", ).tag(config=True) - prefix = Unicode( - default_value="CTLearn", + prefixes = Dict( + default_value={ + "type": "CTLearnClassifier", + "energy": "CTLearnRegressor", + "cameradirection": "CTLearnCameraReconstructor", + "all": "CTLearn", + }, allow_none=False, - help="Name of the reconstruction algorithm used to generate the dl2 data.", + help=( + "Name of the reconstruction algorithm used " + "to generate the dl2 data for each task." + ), ).tag(config=True) load_type_model_from = Path( @@ -260,6 +277,11 @@ class LST1PredictionTool(Tool): classes = classes_with_traits(ImageMapper) def setup(self): + self.log.info("ctlearn version %s", ctlearn_version) + # Validate the prefixes trait dictionary + validate_trait_dict( + self.prefixes, ["type", "energy", "cameradirection", "all"] + ) # Save dl1 image and parameters tree schemas and tel id for easy access self.image_table_path = "/dl1/event/telescope/image/LST_LSTCam" self.parameter_table_name = "/dl1/event/telescope/parameters/LST_LSTCam" @@ -371,13 +393,13 @@ def start(self): write_table( pointing_table, self.output_path, - f"{POINTING_GROUP}/tel_{self.tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL1 telescope pointing table was stored in '%s' under '%s'", self.output_path, - f"{POINTING_GROUP}/tel_{self.tel_id:03d}", + f"{DL1_TEL_POINTING_GROUP}/tel_{self.tel_id:03d}", ) # Set the time format to MJD since in the other table we store the time in MJD time.format = "mjd" @@ -390,13 +412,13 @@ def start(self): write_table( trigger_table, self.output_path, - "/dl1/event/telescope/trigger", + DL1_TEL_TRIGGER_TABLE, overwrite=self.overwrite, ) self.log.info( "DL1 telescope trigger table was stored in '%s' under '%s'", self.output_path, - "/dl1/event/telescope/trigger", + DL1_TEL_TRIGGER_TABLE, ) trigger_table.keep_columns(["obs_id", "event_id", "time"]) trigger_table.add_column( @@ -407,13 +429,13 @@ def start(self): write_table( trigger_table, self.output_path, - "/dl1/event/subarray/trigger", + DL1_SUBARRAY_TRIGGER_TABLE, overwrite=self.overwrite, ) self.log.info( "DL1 subarray trigger table was stored in '%s' under '%s'", self.output_path, - "/dl1/event/subarray/trigger", + DL1_SUBARRAY_TRIGGER_TABLE, ) # Create the dl1 parameters table parameter_table.rename_column("intensity", "hillas_intensity") @@ -457,13 +479,13 @@ def start(self): write_table( parameter_table, self.output_path, - f"/dl1/event/telescope/parameters/tel_{self.tel_id:03d}", + f"{DL1_TEL_PARAMETERS_GROUP}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL1 parameters table was stored in '%s' under '%s'", self.output_path, - f"/dl1/event/telescope/parameters/tel_{self.tel_id:03d}", + f"{DL1_TEL_PARAMETERS_GROUP}/tel_{self.tel_id:03d}", ) # Add additional columns to the parameter table @@ -513,7 +535,7 @@ def start(self): # Get the unmapped image image = get_unmapped_image(event, self.channels, self.transforms) data.append(self.image_mapper.map_image(image)) - input_data = {"input": np.array(data)} + input_data = np.array(data) event_id.extend(dl1_table["event_id"].data) tel_azimuth.extend(dl1_table["tel_az"].data) @@ -555,27 +577,27 @@ def start(self): if self.load_type_model_from is not None: classification_tel_table = example_identifiers.copy() classification_tel_table.add_column( - prediction, name=f"{self.prefix}_tel_prediction" + prediction, name=f"{self.prefixes['type']}_tel_prediction" ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = self._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_prediction"], + columns=[f"{self.prefixes['type']}_tel_prediction"], shapes=[(len(nonexample_identifiers),)], ) classification_tel_table = vstack([classification_tel_table, nan_table]) classification_tel_table.sort(TELESCOPE_EVENT_KEYS) - classification_is_valid = ~np.isnan(classification_tel_table[f"{self.prefix}_tel_prediction"].data, dtype=bool) + classification_is_valid = ~np.isnan(classification_tel_table[f"{self.prefixes['type']}_tel_prediction"].data, dtype=bool) classification_tel_table.add_column( classification_is_valid, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['type']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( classification_tel_table, ParticleClassificationContainer, - prefix=self.prefix, + prefix=self.prefixes['type'], add_tel_prefix=True, ) # Save the prediction to the output file @@ -583,13 +605,13 @@ def start(self): write_table( classification_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/classification/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_PARTICLETYPE_GROUP}/{self.prefixes['type']}/tel_{self.tel_id:03d}", ) # Write the mono telescope prediction to the subarray prediction table if self.dl2_subarray: @@ -601,19 +623,19 @@ def start(self): colname, colname.replace("_tel", "") ) classification_subarray_table.add_column( - [[val] for val in classification_is_valid], name=f"{self.prefix}_telescopes" + [[val] for val in classification_is_valid], name=f"{self.prefixes['type']}_telescopes" ) # Save the prediction to the output file write_table( classification_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/classification/{self.prefix}", + f"{DL2_SUBARRAY_PARTICLETYPE_GROUP}/{self.prefixes['type']}", ) # Adding the feature vectors for the classification if self.dl1_features: @@ -622,10 +644,10 @@ def start(self): ) feature_vector_table.add_column( classification_fvs, - name=f"{self.prefix}_tel_classification_feature_vectors", + name=f"{self.prefixes['all']}_tel_classification_feature_vectors", ) if nonexample_identifiers is not None: - fvs_columns_list.append(f"{self.prefix}_tel_classification_feature_vectors") + fvs_columns_list.append(f"{self.prefixes['all']}_tel_classification_feature_vectors") fvs_shapes_list.append( ( len(nonexample_identifiers), @@ -637,26 +659,26 @@ def start(self): # Convert the reconstructed energy from log10(TeV) to TeV reco_energy = u.Quantity(np.power(10, np.squeeze(energy)), unit=u.TeV) # Add the reconstructed energy to the prediction table - energy_tel_table.add_column(reco_energy, name=f"{self.prefix}_tel_energy") + energy_tel_table.add_column(reco_energy, name=f"{self.prefixes['energy']}_tel_energy") # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = self._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_energy"], + columns=[f"{self.prefixes['energy']}_tel_energy"], shapes=[(len(nonexample_identifiers),)], ) energy_tel_table = vstack([energy_tel_table, nan_table]) energy_tel_table.sort(TELESCOPE_EVENT_KEYS) - energy_is_valid = ~np.isnan(energy_tel_table[f"{self.prefix}_tel_energy"].data, dtype=bool) + energy_is_valid = ~np.isnan(energy_tel_table[f"{self.prefixes['energy']}_tel_energy"].data, dtype=bool) energy_tel_table.add_column( energy_is_valid, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['energy']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( energy_tel_table, ReconstructedEnergyContainer, - prefix=self.prefix, + prefix=self.prefixes['energy'], add_tel_prefix=True, ) # Save the prediction to the output file @@ -664,13 +686,13 @@ def start(self): write_table( energy_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/energy/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_ENERGY_GROUP}/{self.prefixes['energy']}/tel_{self.tel_id:03d}", ) # Write the mono telescope prediction to the subarray prediction table if self.dl2_subarray: @@ -682,19 +704,19 @@ def start(self): colname, colname.replace("_tel", "") ) energy_subarray_table.add_column( - [[val] for val in energy_is_valid], name=f"{self.prefix}_telescopes" + [[val] for val in energy_is_valid], name=f"{self.prefixes['energy']}_telescopes" ) # Save the prediction to the output file write_table( energy_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/energy/{self.prefix}", + f"{DL2_SUBARRAY_ENERGY_GROUP}/{self.prefixes['energy']}", ) # Adding the feature vectors for the energy regression if self.dl1_features: @@ -703,10 +725,10 @@ def start(self): ) feature_vector_table.add_column( energy_fvs, - name=f"{self.prefix}_tel_energy_feature_vectors", + name=f"{self.prefixes['all']}_tel_energy_feature_vectors", ) if nonexample_identifiers is not None: - fvs_columns_list.append(f"{self.prefix}_tel_energy_feature_vectors") + fvs_columns_list.append(f"{self.prefixes['all']}_tel_energy_feature_vectors") fvs_shapes_list.append( ( len(nonexample_identifiers), @@ -747,34 +769,34 @@ def start(self): reco_direction = cam_coord_offset.transform_to(altaz) # Add the reconstructed direction (az, alt) to the prediction table direction_tel_table.add_column( - reco_direction.az.to(u.deg), name=f"{self.prefix}_tel_az" + reco_direction.az.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_az" ) direction_tel_table.add_column( - reco_direction.alt.to(u.deg), name=f"{self.prefix}_tel_alt" + reco_direction.alt.to(u.deg), name=f"{self.prefixes['cameradirection']}_tel_alt" ) # Produce output table with NaNs for missing predictions if len(nonexample_identifiers) > 0: nan_table = self._create_nan_table( nonexample_identifiers, - columns=[f"{self.prefix}_tel_az", f"{self.prefix}_tel_alt"], + columns=[f"{self.prefixes['cameradirection']}_tel_az", f"{self.prefixes['cameradirection']}_tel_alt"], shapes=[(len(nonexample_identifiers),), (len(nonexample_identifiers),)], ) direction_tel_table = vstack([direction_tel_table, nan_table]) direction_tel_table.keep_columns( TELESCOPE_EVENT_KEYS - + [f"{self.prefix}_tel_az", f"{self.prefix}_tel_alt"] + + [f"{self.prefixes['cameradirection']}_tel_az", f"{self.prefixes['cameradirection']}_tel_alt"] ) direction_tel_table.sort(TELESCOPE_EVENT_KEYS) - direction_is_valid = ~np.isnan(direction_tel_table[f"{self.prefix}_tel_az"].data, dtype=bool) + direction_is_valid = ~np.isnan(direction_tel_table[f"{self.prefixes['cameradirection']}_tel_az"].data, dtype=bool) direction_tel_table.add_column( direction_is_valid, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['cameradirection']}_tel_is_valid", ) # Add the default values and meta data to the table add_defaults_and_meta( direction_tel_table, ReconstructedGeometryContainer, - prefix=self.prefix, + prefix=self.prefixes['cameradirection'], add_tel_prefix=True, ) # Save the prediction to the output file @@ -782,13 +804,13 @@ def start(self): write_table( direction_tel_table, self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_TELESCOPE_GROUP}/geometry/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL2_TEL_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}/tel_{self.tel_id:03d}", ) # Write the mono telescope prediction to the subarray prediction table if self.dl2_subarray: @@ -800,19 +822,19 @@ def start(self): colname, colname.replace("_tel", "") ) direction_subarray_table.add_column( - [[val] for val in direction_is_valid], name=f"{self.prefix}_telescopes" + [[val] for val in direction_is_valid], name=f"{self.prefixes['cameradirection']}_telescopes" ) # Save the prediction to the output file write_table( direction_subarray_table, self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", overwrite=self.overwrite, ) self.log.info( "DL2 prediction data was stored in '%s' under '%s'", self.output_path, - f"{DL2_SUBARRAY_GROUP}/geometry/{self.prefix}", + f"{DL2_SUBARRAY_GEOMETRY_GROUP}/{self.prefixes['cameradirection']}", ) # Adding the feature vectors for the arrival direction regression if self.dl1_features: @@ -821,10 +843,10 @@ def start(self): ) feature_vector_table.add_column( direction_fvs, - name=f"{self.prefix}_tel_direction_feature_vectors", + name=f"{self.prefixes['all']}_tel_direction_feature_vectors", ) if nonexample_identifiers is not None: - fvs_columns_list.append(f"{self.prefix}_tel_direction_feature_vectors") + fvs_columns_list.append(f"{self.prefixes['all']}_tel_direction_feature_vectors") fvs_shapes_list.append( ( len(nonexample_identifiers), @@ -847,19 +869,19 @@ def start(self): # Add is_valid column to the feature vector table feature_vector_table.add_column( is_valid_col, - name=f"{self.prefix}_tel_is_valid", + name=f"{self.prefixes['all']}_tel_is_valid", ) # Save the prediction to the output file write_table( feature_vector_table, self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{self.tel_id:03d}", overwrite=self.overwrite, ) self.log.info( "DL1 feature vectors was stored in '%s' under '%s'", self.output_path, - f"{DL1_TELESCOPE_GROUP}/features/{self.prefix}/tel_{self.tel_id:03d}", + f"{DL1_TEL_GROUP}/features/{self.prefixes['all']}/tel_{self.tel_id:03d}", ) def finish(self): @@ -889,7 +911,7 @@ def _split_model(self, model): backbone = model.get_layer(index=1) # Create a new head model with the same layers as the original model. # The output of the backbone model is the input of the head model. - backbone_output_shape = keras.Input(model.layers[2].input_shape[1:]) + backbone_output_shape = keras.Input(model.layers[2].input.shape[1:]) x = backbone_output_shape for layer in model.layers[2:]: x = layer(x) diff --git a/ctlearn/tools/tests/test_predict_LST1.py b/ctlearn/tools/tests/test_predict_LST1.py new file mode 100644 index 00000000..62969979 --- /dev/null +++ b/ctlearn/tools/tests/test_predict_LST1.py @@ -0,0 +1,122 @@ +import shutil +import numpy as np +import pytest + +from ctapipe.core import run_tool +from ctapipe.io import TableLoader +from ctlearn.tools import LST1PredictionTool + +# Columns that should be present in the output DL2 file +REQUIRED_COLUMNS = [ + "event_id", + "obs_id", + "CTLearnCameraReconstructor_tel_alt", + "CTLearnCameraReconstructor_alt", + "CTLearnCameraReconstructor_tel_az", + "CTLearnCameraReconstructor_az", + "CTLearnClassifier_tel_prediction", + "CTLearnClassifier_prediction", + "CTLearnRegressor_tel_energy", + "CTLearnRegressor_energy", + "CTLearnCameraReconstructor_tel_is_valid", + "CTLearnCameraReconstructor_is_valid", + "CTLearnClassifier_tel_is_valid", + "CTLearnClassifier_is_valid", + "CTLearnRegressor_tel_is_valid", + "CTLearnRegressor_is_valid", + "CTLearnCameraReconstructor_telescopes", + "CTLearnClassifier_telescopes", + "CTLearnRegressor_telescopes", +] + + +@pytest.mark.verifies_usecase("DPPS-UC-130-1.2.2") +def test_predict_mono_model_with_lst1_mock_data( + tmp_path, ctlearn_trained_dl1_mono_models, mock_lst1_dl1_file +): + """ + Test LST1PredictionTool using trained mono models and mock LST-1 DL1 files. + Each test run gets its own isolated temp directories. + """ + + model_dir = tmp_path / "trained_models" + model_dir.mkdir(parents=True, exist_ok=True) + + dl2_dir = tmp_path / "dl2_output" + dl2_dir.mkdir(parents=True, exist_ok=True) + + # Hardcopy the trained models to the model directory + telescope_type = "LST" + for reco_task in ["type", "energy", "cameradirection"]: + key = f"{telescope_type}_{reco_task}" + shutil.copy( + ctlearn_trained_dl1_mono_models[key], + model_dir / f"ctlearn_mono_model_{key}.keras", + ) + model_file = model_dir / f"ctlearn_mono_model_{key}.keras" + assert model_file.exists(), f"Trained mono model file not found for {key}" + + # Check that the mock LST1 DL1 file was created + assert mock_lst1_dl1_file.exists(), "Mock LST1 DL1 file not found" + + output_file = dl2_dir / "mock_lst1_predictions.dl2.h5" + + # Build command-line arguments for LST1PredictionTool + argv = [ + f"--input_url={mock_lst1_dl1_file}", + f"--output={output_file}", + "--LST1PredictionTool.batch_size=2", + "--LST1PredictionTool.channels=cleaned_image", + "--LST1PredictionTool.channels=cleaned_relative_peak_time", + "--LST1PredictionTool.image_mapper_type=BilinearMapper", + f"--type_model={model_dir}/ctlearn_mono_model_{telescope_type}_type.keras", + f"--energy_model={model_dir}/ctlearn_mono_model_{telescope_type}_energy.keras", + f"--cameradirection_model={model_dir}/ctlearn_mono_model_{telescope_type}_cameradirection.keras", + "--dl2-telescope", + "--overwrite", + ] + + # Run LST1PredictionTool + assert run_tool(LST1PredictionTool(), argv=argv, cwd=tmp_path) == 0 + + # Check that the output DL2 file was created + assert output_file.exists(), "Output DL2 file not created" + + # Check that the created DL2 file can be read with the TableLoader + allowed_tels = [1] + with TableLoader( + output_file, pointing=True, focal_length_choice="EFFECTIVE" + ) as loader: + # Check telescope-wise data + tel_events = loader.read_telescope_events_by_id( + telescopes=allowed_tels, dl1_parameters=True, dl2=True + ) + for tel_id in allowed_tels: + assert len(tel_events[tel_id]) > 0 + for col in REQUIRED_COLUMNS + [ + "tel_id", + "hillas_intensity", + "telescope_pointing_azimuth", + "telescope_pointing_altitude", + ]: + if "_tel_" not in col: + continue + assert ( + col in tel_events[tel_id].colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + tel_events[tel_id][col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}" + + # Check subarray-wise data + subarray_events = loader.read_subarray_events(start=0, stop=2, dl2=True) + assert len(subarray_events) > 0 + for col in REQUIRED_COLUMNS: + if "_tel_" in col: + continue + assert ( + col in subarray_events.colnames + ), f"{col} missing in DL2 file {output_file.name}" + assert ( + subarray_events[col][0] is not np.nan + ), f"{col} has NaN values in DL2 file {output_file.name}"