diff --git a/HighEnergyObsCoreExt.tex b/HighEnergyObsCoreExt.tex index 2a2137c..414c602 100644 --- a/HighEnergyObsCoreExt.tex +++ b/HighEnergyObsCoreExt.tex @@ -37,11 +37,11 @@ \newacronym{UHE}{UHE}{Ultra High Energy} \newacronym{HESS}{H.E.S.S.}{High Energy Stereoscopic System} \newacronym{CTAO}{CTAO}{Cherenkov Telescope Array Observatory} -\newacronym{IACT}{IACT}{imaging atmospheric Cherenkov telescopes} +\newacronym{IACT}{IACT}{Imaging Atmospheric Cherenkov Telescopes} \newacronym[plural=IRFs,firstplural=Instrument Response Functions (IRFs)]{IRF}{IRF}{Instrument Response Function} -\newacronym{PSF}{PSF}{point spread function} -\newacronym{RMF}{RMF}{redistribution matrix file} -\newacronym{ARF}{ARF}{auxiliary response file} +\newacronym{PSF}{PSF}{Point Spread Function} +\newacronym{RMF}{RMF}{Redistribution Matrix File} +\newacronym{ARF}{ARF}{Auxiliary Response File} \newacronym{ESA}{ESA}{European Space Agency} \newacronym{XMM-Newton}{XMM-Newton}{X-ray Multi-Mirror Mission} \newacronym{SSC}{SSC}{Survey Science Centre} @@ -49,7 +49,7 @@ \newacronym{ESAC}{ESAC}{European Space Astronomy Centre} \newacronym{SAS}{SAS}{scientific analysis software} \newacronym{EPIC}{EPIC}{European Photon Imaging Camera} -\newacronym{TAP}{TAP}{table access protocol} +\newacronym{TAP}{TAP}{Table Access Protocol} \newacronym{SVOM}{SVOM}{Space-based multi-band astronomical Variable Objects Monitor} \newacronym{KM3NeT}{KM3NeT}{Cubic Kilometre Neutrino Telescope} \newacronym{ORCA}{ORCA}{Oscillation Research with Cosmics in the Abyss} @@ -57,14 +57,14 @@ \newacronym{ANTARES}{ANTARES}{Astronomy with a Neutrino Telescope and Abyss Environmental Research} \newacronym{GW}{GW}{Gravitational wave} \newacronym{WCD}{WCD}{Water Cherenkov Detector} -\newacronym[plural=STIs]{STI}{STI}{stable time interval} -\newacronym[plural=GTIs]{GTI}{GTI}{good time interval} +\newacronym[plural=STIs]{STI}{STI}{Stable Time Interval} +\newacronym[plural=GTIs]{GTI}{GTI}{Good Time Interval} \newacronym{FITS}{FITS}{Flexible Image Transport System} \newacronym{ACIS}{ACIS}{Advanced CCD Imaging Spectrometer} \newacronym{HRC}{HRC}{High Resolution Camera} \newacronym{CXC}{CXC}{Chandra X-ray Center} \newacronym{CDA}{CDA}{Chandra Data Archive} -\newacronym{CTI}{CTI}{charge transfer efficiency} +\newacronym{CTI}{CTI}{Charge Transfer Efficiency} \newacronym{OGIP}{OGIP}{Office of Guest Investigator Programs} \newacronym{NASA}{NASA}{National Aeronautics and Space Administration} \newacronym{HEASARC}{HEASARC}{High Energy Astrophysics Science Archive Research Center} @@ -83,7 +83,7 @@ \begin{document} \begin{abstract} -This is a proposed extension to the ObsCore specification for data description, discovery and selection of \gls{HEA} data, and includes proposed updates to the data product vocabulary, UCDs, and MIME-types to support discovery of \gls{HEA} data. +This document describes a proposed extension to the ObsCore specification for data description, discovery and selection of \gls{HEA} data. It includes proposed updates to the data product vocabulary, UCDs, and MIME-types to support discovery of \gls{HEA} data. \end{abstract} @@ -96,24 +96,25 @@ \section*{Conformance-related definitions} The words ``MUST'', ``SHALL'', ``SHOULD'', ``MAY'', ``RECOMMENDED'', and ``OPTIONAL'' (in upper or lower case) used in this document are to be interpreted as described in IETF standard RFC2119 \citep{std:RFC2119}. -The \emph{Virtual Observatory} (VO) is a general term for a collection of federated resources that can be used to conduct astronomical research, education, and outreach. +The \gls{VO} is a general term for a collection of federated resources that can be used to conduct astronomical research, education, and outreach. The \href{https://www.ivoa.net}{International Virtual Observatory Alliance (IVOA)} is a global collaboration of separately funded projects to develop standards and infrastructure that enable VO applications. \section{Introduction} -The \gls{IVOA} \gls{HEIG} was formed in the Fall of 2024, and developed an \gls{IVOA} Note \citep{2024ivoa.note.heig} that explores the connections between the \gls{VO} and \gls{HEA}. Here, the \gls{HEA} covers experiments and observatories from the X-ray range up to the PeV range, as well as the astrophysical neutrinos above the TeV range, called here the \gls{HE} domain. The HEIG Note includes an outline of several important topics that have formed a roadmap for the group. An ObsCore \citep{2017ivoa.spec.0509L} extension for \gls{HEA} data is the first priority in order to meet the needs of HEA, and to coincide with similar work being carried out by the Radio IG, Time Domain IG, and discussions on DM standards, such that current and future \gls{HEA} experiments and observatories are able to release data on the \gls{VO}. +The \gls{IVOA} \gls{HEIG} was formed in the Fall of 2024, and developed an \gls{IVOA} Note \citep{2024ivoa.note.heig} that explores the connections between the \gls{VO} and \gls{HEA}. Here, the \gls{HEA} covers experiments and observatories from the X-ray range up to the PeV range, as well as the astrophysical neutrinos above the TeV range, called here the \gls{HEA} domain. The HEIG Note includes an outline of several important topics that have formed a roadmap for the group. An ObsCore \citep{2017ivoa.spec.0509L} extension for \gls{HEA} data is the first priority in order to meet the needs of HEA, and to coincide with similar work being carried out by the Radio IG, Time Domain IG, and discussions on DM standards, such that current and future \gls{HEA} experiments and observatories are able to release data on the \gls{VO}. The goal is to explore elements needed to reliably discover and select \gls{HEA} data through \gls{IVOA} interfaces. It implies defining an extension to ObsCore with the possibility to use the DataLink mechanism and to enhance vocabularies of keywords for ObsCore and DataLink. We suggest that, if an attribute is unique to \gls{HEA} data, that element should appear in an \gls{HEA} ObsCore extension. Whereas, if an attribute makes sense for more than one domain and can be shared across those domains, then that element should be added to the base ObsCore model. This note proposes recommendations in both of these categories. We also discuss enhancements to the vocabulary of data products, DataLink semantics, UCDs and MIME-types to correctly represent \gls{HEA} data. Topics related to the Registry are currently outlined in the proposed Radio extension document and are not discussed here. \section{High Energy Astrophysics Data} -\gls{HEA} data include observations obtained using photon detectors covering X-ray (from $\sim$0.1 eV to $\sim$100 keV) through gamma-ray (from 100 MeV up to $\gtrsim$ PeV) energies, as well as cosmic-ray and astrophysical neutrino ($\gtrsim$ TeV) detectors. The domain is now sufficiently mature to provide open data that are science-ready and work with open analysis tools ({\em e.g.\/} CIAO \citep{2006SPIE.6270E..1VF} or Gammapy \citep{gammapy:2023}). The science output of the \gls{HE} domain already includes high-level products such as images, cubes, spectra, and time series such as light curves and time-resolved spectra. Additional data products include fitted sky models with a spatial, spectral and/or temporal component(s), along with their confidence intervals or confidence limits, and covariance matrices. Finally, multiple \gls{HE} instruments produce source catalogs and surveys covering up to the full the sky, which include maps of photon or particle flux, exposure, sensitivity, and aperture-photometry likelihood profiles. +\gls{HEA} data include observations obtained using photon detectors covering X-ray (from $\sim$0.1 eV to $\sim$100 keV) through gamma-ray (from 100 MeV up to $\gtrsim$ PeV) energies, as well as cosmic-ray and astrophysical neutrino ($\gtrsim$ TeV) detectors, or other messenger related to \gls{HEA} phenomena. The domain is now sufficiently mature to provide open data that are science-ready and work with open analysis tools ({\em e.g.\/} CIAO \citep{2006SPIE.6270E..1VF} or Gammapy \citep{gammapy:2023}). The science output of the \gls{HEA} domain already includes high-level products such as images, cubes, spectra, and time series such as light curves and time-resolved spectra. Additional data products include fitted sky models with a spatial, spectral and/or temporal component(s), along with their confidence intervals or confidence limits, and covariance matrices. Finally, multiple \gls{HEA} instruments produce source catalogs and surveys covering up to the full the sky, which include maps of photon or particle flux, exposure, sensitivity, and aperture-photometry likelihood profiles. -Observations of the Universe at the highest energies are based on techniques that are radically different compared to the UV through radio domains. \gls{HE} observatories\footnote{One can cite Chandra, XMM-Newton, Fermi, H.E.S.S., MAGIC, VERITAS, HAWC, LHAASO, IceCube, Auger and soon CTAO and KM3NeT, SWGO.} are generally designed to detect particles ({\em e.g.\/}, individual photons, cosmic-rays or neutrinos) with the ability to estimate multiple observables for those particles. These detection techniques rely all on \emph{event counting}\footnote{By opposition of aperture photometry when integrating signal into a light detector.}, where an event has some probability of being due to the interaction of an astronomical particle with the detectors, but also some probability from being from instrumental or background effects. The data corresponding to an event are first an instrumental signal, which is then calibrated and processed to estimate physical quantities such as a time of arrival, point-of-origin on the sky, and an energy proxy associated with the event. Several other intermediate and qualifying characteristics may be associated with a detected event, depending on the detection technique. The ensemble of events detected over a given time interval and spatial field-of-view is referred to as an \emph{event list}. +Observations of the Universe at the highest energies are based on techniques that are radically different compared to the UV through radio domains. \gls{HEA} observatories\footnote{One can cite Chandra, XMM-Newton, Fermi, H.E.S.S., MAGIC, VERITAS, HAWC, LHAASO, IceCube, Auger and soon CTAO and KM3NeT, SWGO.} are generally designed to detect particles ({\em e.g.\/}, individual photons, cosmic-rays or neutrinos) with the ability to estimate multiple observables for those particles. These detection techniques rely all on \emph{event counting}\footnote{As opposed to signal integrating using a light detector.}, where an event has some probability of being due to the interaction of an astronomical particle with the detectors, but also some probability from being from instrumental or background effects. The data corresponding to an event are first an instrumental signal, which is then calibrated and processed to estimate physical quantities such as a time of arrival, point-of-origin on the sky, and an energy proxy associated with the event. Several other intermediate and qualifying characteristics may be associated with a detected event, depending on the detection technique. The ensemble of events detected over a given time interval and spatial field-of-view is referred to as an \emph{event list}. -Though \textbf{event-lists} \emph{may} include estimators for calibrated physical values, they typically still have to be corrected for the photometric, spectral, spatial, and/or temporal responses of the telescope and detector combination to yield scientifically interpretable information. The mappings between the observables and physical measurements of the source properties are called \glspl{IRF}; using techniques like forward-folding, they enable one to fit a model (with any combination of spectral, spatial, and temporal components) of the true flux of particles from a source arriving at the instrument to the measured quantities. The \glspl{IRF} generally evolve over time with the instrument and observation characteristics, and are usually defined for a specific time interval and decomposed into a standard set of independent components (see section 3.1.5 of \citep{2024ivoa.note.heig}), such as the spatial point-spread function or the energy-migration matrix, where each component may be stored or computed separately. Some of the \glspl{IRF} are probabilistic in nature\footnote{The energy matrix is a probability density funtion}, and in addition may depend on the set of events selected for analysis by the end user. They are usually not invertible, so methods such as forward-folding fitting (using source models that are estimated) are needed to estimate physical properties given observables. Since both \glspl{IRF} and \textbf{event-lists} are required to process \gls{HEA} data, some \gls{IVOA} standards must be be modified in order to expose both of them via the \gls{VO}. +Though \textbf{event-lists} \emph{may} include estimators for calibrated physical values, they typically still have to be corrected for the photometric, spectral, spatial, and/or temporal responses of the telescope and detector combination to yield scientifically interpretable information. The mappings between the observables and physical measurements of the source properties are called \glspl{IRF}\footnote{We try to avoid using the term \gls{IRF} in a normative sense since historical usage across the broad \gls{HEA} community (and from facility to facility) varies. In some cases, \gls{IRF} has been used to mean specifically the X-ray product of the ``ARF'' and ``RMF'', whereas in other cases \gls{IRF} +has been used more generally to mean any instrumental response function regardless of type.}; using techniques like forward-folding, they enable one to fit a model (with any combination of spectral, spatial, and temporal components) of the true flux of particles from a source arriving at the instrument to the measured quantities. The \glspl{IRF} generally evolve over time with the instrument and observation characteristics, and are usually defined for a specific time interval and decomposed into a standard set of independent components (see section 3.1.5 of \citep{2024ivoa.note.heig}), such as the spatial point-spread function or the energy-migration matrix, where each component may be stored or computed separately. Some of the \glspl{IRF} are probabilistic in nature\footnote{The energy matrix is a probability density funtion}, and in addition may depend on the set of events selected for analysis by the end user. They are usually not invertible, so methods such as forward-folding fitting (using source models that are estimated) are needed to estimate physical properties given observables. Since both \glspl{IRF} and \textbf{event-lists} are required to process \gls{HEA} data, some \gls{IVOA} standards must be be modified in order to expose both of them via the \gls{VO}. In the following, the current ObsCore standard will be discussed in Section \ref{sec:obscore}, focusing on attributes that need to be modified. Then, we propose the creation of a \gls{HEA} extension of ObsCore in Section \ref{sec:obscoreext}, as some attributes are very specific to our domain (see \S\ref{sec:ibscoreext}). In these two sections, the discussion focuses on the attribute definitions rather on the attribute values. In Section \ref{sec:voc}, enhancement of vocabulary is proposed for some ObsCore attributes, DataLink semantics, UCDs and MIME-types. @@ -122,7 +123,7 @@ \section{ObsCore Attribute Definitions for High Energy Astrophysics Data} The ObsCore representation of any \gls{HEA} \textbf{event-list} data products is described in terms of curation, coverage, and access. However, given the \gls{HEA} data specificities, several properties, including resolutions, observable axis descriptions, and polarization states would be simply set to ``NULL'', and data axis lengths are set to ``$-1$''. Therefore, for these data products and associated \glspl{IRF}, the definitions of some ObsCore attributes should be adjusted so that they better represent the content of the data from the perspective of data discovery. We note that many properties, including spatial and spectral coverage and resolution can vary strongly with energy and off-axis angle. These adjustments will also typically apply to advanced, high-level data products derived from \textbf{event-list} data. -In addition, the hereafter modification proposal faces to the issue that some values of ObsCore attributes ({\em dataproduct\_type} and {\em calib\_level}) are defined both into the Obscore standard document \citep{2017ivoa.spec.0509L} and in the vocabularies documents \citep{2023ivoa.spec.0206D, 2021ivoa.spec.0525D}, which might create some issues for the users. In this context, we have opted to propose in this document some modifications of both standards, even if we would have prefered that everything is uniquely defined in the \gls{IVOA} Vocabulary. Some harmonization should be taken by the Data Model and Semantics working groups in order to avoid duplications. But until such work is achieved, we require modifications in ObsCore and Vocabulary. +In addition, the hereafter modification proposal faces to the issue that some values of ObsCore attributes ({\em dataproduct\_type} and {\em calib\_level}) are defined both into the ObsCore standard document \citep{2017ivoa.spec.0509L} and in the vocabularies documents \citep{2023ivoa.spec.0206D, 2021ivoa.spec.0525D}, which might create some issues for the users. In this context, we have opted to propose in this document some modifications of both standards, even if we would have prefered that everything is uniquely defined in the \gls{IVOA} Vocabulary. Some harmonization should be taken by the Data Model and Semantics working groups in order to avoid duplications. But until such work is achieved, we require modifications in ObsCore and Vocabulary. \subsection{{\em dataproduct\_type}} \label{sec:dataproduct_type} @@ -135,20 +136,20 @@ \subsection{{\em dataproduct\_type}} {\bf event}: an event-counting ({\em e.g.\/}, X-ray or other high energy) dataset of some sort. Typically this is instrumental data, {\em i.e.\/}, ``event data''. An event dataset is often a complex object containing multiple files or other substructures. An event dataset may contain data with spatial, spectral, and time information for each measured event, although the spectral resolution (energy) is sometimes limited. Event data may be used to produce higher level data products such as images or spectra. \end{quote} -We propose to add the following {\em dataproduct\_type} term in both the Obscore standard and into the \gls{IVOA} vocabulary is of Product Types\footnote{See \url{https://www.ivoa.net/rdf/product-type}.} to better define a \gls{HEA} \textbf{event-list} and a \textbf{event-list} that includes the event-list and its associated data: +We propose to add the following {\em dataproduct\_type} term in both the ObsCore standard and into the \gls{IVOA} Vocabulary of Data Product Types\footnote{See \url{https://www.ivoa.net/rdf/product-type}.} to better define an \gls{HEA} \textbf{event-list} and an \textbf{event-bundle} that includes the event-list and its associated data: \begin{quote} -{\bf event-list}: a collection of observed particle-detection events, such as incoming high-energy particles. The table of event list is typically characterised by a spatial position, a time and an energy proxy. +{\bf event-list}: a collection of observed particle-detection events, such as incoming high-energy particles. The table of event list is typically characterised by a spatial position, a time and an energy proxy. -{\bf event-bundle}: compounded dataset containing an {\bf event-list} and multiple files or other substructures that are products necessary to analyze the event-list. Data in an event-bundle may thus be used to produce higher level data products such as images or spectra when containing \glspl{IRF}. +{\bf event-bundle}: compounded dataset containing an {\bf event-list} and multiple files or other substructures that are products necessary to analyze the event-list. Data in an event-bundle may thus be used to produce higher level data products calibrated in physical units when containing \glspl{IRF} or other data products that can be used to construct \glspl{IRF}. \end{quote} It may be worth mentioning that the term ``event'' caused confusion in the past, as it also is used for astrophysical events like supernova explosions ({\em e.g.\/} VOEvent), and that is not the type of event that is being described here, which are particle detection events. Using "event-list" was meant to help to resolve this ambiguity. -An {\bf event-bundle} might for example consist of an {\bf event-list} and the associated {\bf response-functions} (see below) used to calibrate the dataset; alternatively an {\bf event-bundle} may include the {\bf event-list} and associated data products necessary for the user to create the {\bf response-functions} (for those X-ray cases where detailed knowledge of the scientific use case — for example, the user’s selection of events — may be required to compute the responses).\\ -particle-detection +%An {\bf event-bundle} might for example consist of an {\bf event-list} and the associated {\bf response-functions} (see below) used to calibrate the dataset; alternatively an {\bf event-bundle} may include the {\bf event-list} and associated data products necessary for the user to create the {\bf response-functions} (for those X-ray cases where detailed knowledge of the scientific use case — for example, the user’s selection of events — may be required to compute the responses).\\ +%particle-detection -In addition to {\em dataproduct\_type} terms that focus on event data, we note that existing ObsCore definitions do not adequately span the breadth of advanced data products (with {\em calib\_level} $\ge$ 3) that may be generated from astronomical observations by users or observatories. The computational complexity of analyzing \gls{HEA} data robustly in the extreme Poisson regime ({\em e.g.\/}, Bayesian X-ray aperture photometry applied simultaneously to multiple overlapping detections and observations) means that data providers may choose to provide such analysis products directly to the end user. For example, the Chandra Source Catalog includes 38 types of advanced data products (for a total of $\sim$90 million files) and $\sim$50\% of these data product types are not well represented by a {\em dataproduct\_type} value that allows for meaningful data discovery. Users will certainly want to discover these data products independently from the associated observation data (and many of these data products combine data from multiple observations). We therefore propose the following additional {\em dataproduct\_type} (or {\em dataproduct\_subtype}) terms for these advanced data products, and note that these terms will certainly be useful independent of waveband (i.e., they can be equally applicable to UV/optical, IR, and radio datasets): +In addition to {\em dataproduct\_type} terms that focus on event data, we note that existing ObsCore definitions do not adequately span the breadth of advanced data products (with {\em calib\_level} $\ge$ 3) that may be generated from astronomical observations by users or observatories. The computational complexity of analyzing \gls{HEA} data robustly in the extreme Poisson regime ({\em e.g.\/}, Bayesian X-ray aperture photometry applied simultaneously to multiple overlapping detections and observations) means that data providers may choose to provide such analysis products directly to the end user. For example, the Chandra Source Catalog includes 38 types of advanced data products (for a total of $\sim$90 million files) and $\sim$50\% of these data product types are not well represented by a {\em dataproduct\_type} value that allows for meaningful data discovery. Users will certainly want to discover these data products independently from the associated observation data (and many of these data products combine data from multiple observations). We therefore propose the following additional {\em dataproduct\_type} (or {\em dataproduct\_subtype}) terms for these advanced data products, and note that these terms will certainly be useful independent of waveband ({\em i.e.\/}, they can be equally applicable to UV/optical, IR, and radio datasets): \begin{quote} {\bf draws}: a dataset that represents draws computed from a probability distribution, for example the Markov chain Monte Carlo (MCMC) draws used when computing the Bayesian marginal probability density function for a random variable. The draws @@ -158,12 +159,11 @@ \subsection{{\em dataproduct\_type}} {\bf region}: a dataset that includes an encoding of (one or more) regions of parameter space, for example a spatial region or a region of phase space covered by a dataset. The set of dimensions represented by the region can be arbitrary. -{\bf response-function}: a dataset that represents a mapping from a physical quantity to an observable. For \gls{HEA}, this may be the components of the composite \gls{IRF}\footnote{We try to avoid using the term \gls{IRF} in a normative sense since historical usage across the broad \gls{HEA} community (and from facility to facility) varies. In some cases, \gls{IRF} has been used to mean specifically the X-ray product of the ``ARF'' and ``RMF'', whereas in other cases \gls{IRF} -has been used more generally to mean any instrumental response function regardless of type.} such as an Auxiliary Response File ({\bf ARF}), Redistribution Matrix File ({\bf RMF}), Effective Area ({\bf AEFF}), Energy Dispersion ({\bf EDISP}), the Background Rate ({\bf BKGRATE}). The Point Spread Function ({\bf PSF}) is a response function that is generally applicable across multiple wavebands. While these datasets may generally be represented as an N-dimensional data cube, designating them as {\bf response-functions} enhances data discovery for very common types of \gls{HEA} dataset (see the use cases in appendix \ref{sec:uc}). +{\bf response-function}: a dataset that represents a mapping from a physical quantity to an observable. For \gls{HEA}, this may be the components of the composite \gls{IRF} such as an Auxiliary Response File ({\bf ARF}), Redistribution Matrix File ({\bf RMF}), Effective Area ({\bf AEFF}), Energy Dispersion ({\bf EDISP}), the Background Rate ({\bf BKGRATE}). The Point Spread Function ({\bf PSF}) is a response function that is generally applicable across multiple wavebands. While these datasets may generally be represented as an N-dimensional data cube, designating them as {\bf response-functions} enhances data discovery for very common types of \gls{HEA} dataset (see the use cases in appendix \ref{sec:uc}). \end{quote} -The {\bf measurements} data product type is quite useful for many different types of advanced data products (that may be derived from multiple observations) but users of those products often may not be interested the progenitor datasets, especially if many advanced data products are extracted from a single or a few progenitors ({\em e.g.\/}, {\bf measurements associated with sources detected in a single observation field}). We propose to delete the caveat associated with {\bf dataproduct\_type} = ``measurements'' in the ObsCore IVOA Recommendation (\S4.1.1) that requires the derived data products be exposed ``together with the progenitor observation dataset''.\\ +The {\bf measurements} data product type is quite useful for many different types of advanced data products (that may be derived from multiple observations) but users of those products often may not be interested the progenitor datasets, especially if many advanced data products are extracted from a single or a few progenitors ({\em e.g.\/}, measurements associated with sources detected in a single observation field). We propose to delete the caveat associated with {\bf dataproduct\_type} = ``measurements'' in the ObsCore IVOA Recommendation (\S4.1.1) that requires the derived data products be exposed ``together with the progenitor observation dataset''. Note that these terms will be repeated in the section \ref{sec:voc}, as mentioned in the introduction of this sub-section. @@ -177,7 +177,7 @@ \subsection{{\em calib\_level}} ObsCore defines calibration {\bf Level 1} as ``Instrumental data in a standard format (FITS, VOTable, SDFITS, ASDM, etc.) which could be manipulated with standard astronomical packages.'' and {\bf Level 2} as ``Calibrated, science ready data with the instrument signature removed.'' -However, some \gls{HEA} {\bf event-list}s include spatial and time axes that are calibrated physical quantities, but the spectral axis is instrumental and requires application of the IRFs to remove this signature. In X-ray, this is typically done because the {\bf response-funtion}s can depend on the choice of region (spatial/time) from which the events are extracted (especially for telescope/detector combinations where the telescope position dithers on the sky during the exposure), which depends on the specific science case and therefore cannot be determined {\em a priori\/}. Such {\bf event-list}s fall ``between'' {\em calib\_level\/} 1 and 2. +However, some \gls{HEA} {\bf event-list}s include spatial and time axes that are calibrated physical quantities, but the spectral axis is instrumental and requires application of the IRFs to remove this signature. In X-ray, this is typically done because the {\bf response-function}s can depend on the choice of region (spatial/time) from which the events are extracted (especially for telescope/detector combinations where the telescope position dithers on the sky during the exposure), which depends on the specific science case and therefore cannot be determined {\em a priori\/}. Such {\bf event-list}s fall ``between'' {\em calib\_level\/} 1 and 2. On the other hand, other {\bf event-list}s may not have any calibrated axes or may have all axes calibrated, and it is important to be able to differentiate between these for data discovery. While the value for {\em calib\_level\/} for any data product is left for the data provider to determine, we suggest that individual data providers set {\em calib\_level\/} = 1 if an {\bf event-list} is considered to be ``uncalibrated'' according to normal usage for their data products and set {\em calib\_level\/} = 2 if an {\bf event-list} is considered to be ``calibrated'' according to normal usage for their data products. @@ -185,7 +185,7 @@ \subsection{{\em calib\_level}} \subsection{{\em access\_url}} -Given the complexity and number of HE data products, the {\em access\_url} may point either directly to a file ({\em e.g.\/} to the {\bf event-list} or an {\bf event-bundle}), or to a DataLink service that will provide links to the data and to associated data ({\em e.g.\/} response functions). +Given the complexity and number of HE data products, the {\em access\_url} may point either directly to a file ({\em e.g.\/} to the {\bf event-list} or an {\bf event-bundle}), or to a DataLink service that will provide links to the data and to associated data ({\em e.g.\/} {\bf response-function}s). If DataLink is provided, it should be indicated that the URL points to a Datalink service via the {\em access\_format} = application/x-votable+xml;content\\=datalink. @@ -229,9 +229,9 @@ \subsection{{\em o\_ucd}} For an {\bf event-list}, we can consider that all measures stored in column values are observables. This is {\em the\/} fundamental difference between \gls{HEA} {\bf event-list}s and typical pixelated datasets. The current ObsCore Recommendation suggests that {\em o\_ucd\/} be set to ``NULL'' for event lists. However this significantly hampers data discovery for \gls{HEA} datasets. Since the data content of {\bf event-list}s may vary significantly from facility to facility, meaningful discovery of \gls{HEA} datasets {\em requires\/} the user be able to query the UCDs of the set of observables included in an {\bf event-list}. -A natural way of doing this that is consistent with current usage would be to extend {\em o\_ucd\/} to allow specification of {\em multiple\/} observables for {\bf event-list}s (and {\bf event-bundle}s, for example, {\em o\_ucd\/} = {\em pos.eq,time,phys.pulseHeight\/}. +A natural way of doing this that is consistent with current usage would be to extend {\em o\_ucd\/} to allow specification of {\em multiple\/} observables for {\bf event-list}s (and {\bf event-bundle}s), for example, {\em o\_ucd\/} = {\em pos.eq,time,phys.pulseHeight\/}. -Note that real {\bf event-list}s may include an extensive set of columns ({\em e.g.\/}, a {\em Chandra\/} ACIS Level~1 {\bf event-list} includes $\sim\!20$ columns, depending on observing mode) and several columns may represent similar (but not identical) observables ({\em e.g.\/}, event position in detector pixel coordinates, projected onto the focal surface, corrected for geometric distortions, corrected for spacecraft dither motion, mapped to world coordinates). Currently defined UCDs are not sufficiently fine-grained to be able to differentiate between these various cases. But that is likely not be necessary, since for data discovery purposes the user is typically interested in the ``most calibrated'' properties in each of the spatial/spectral/time(/polarization) axes ({\em e.g.\/}, world coordinates in the above example). +Note that real {\bf event-list}s may include an extensive set of columns ({\em e.g.\/}, a {\em Chandra\/} ACIS Level~1 {\bf event-list} includes $\sim\!20$ columns, depending on observing mode) and several columns may represent similar (but not identical) observables ({\em e.g.\/}, event position in detector pixel coordinates, projected onto the focal surface, corrected for geometric distortions, corrected for spacecraft dither motion, mapped to world coordinates). Currently defined UCDs are not sufficiently fine-grained to be able to differentiate between these various cases. But that is likely not necessary, since for data discovery purposes the user is typically interested in the ``most calibrated'' properties in each of the spatial/spectral/time(/polarization) axes ({\em e.g.\/}, world coordinates in the above example). In the example {\em o\_ucd\/} above, the example UCD {\em phys.pulseHeight\/} is used to represent the detector Pulse Height Amplitude (PHA). There is currently no UCD defined for a raw measure like PHA, but we propose the addition of {\em phys.pulseHeight\/} to the UCDList vocabulary, together with other UCDs that are relevant for \gls{HEA} data, in Section~\ref{sec:UCDs}. @@ -254,7 +254,7 @@ \subsection{{\em ev\_xel}} \subsection{{\em s\_ref\_energy\/}/{\em em\_ref\_energy\/}/{\em s\_ref\_oaa\/}/{\em em\_ref\_oaa}} -For \gls{HEA} datasets that typically span decades of energy, both spatial resolution and sky coverage, and spectral resolution, can be strongly dependent on particle energy. The ObsCore Recommendation suggests that in such circumstances a {\em characteristic\/} value be specified for the spatial and spectral characterization attributes ({\em e.g.\/}, {\em s\_fov\/}, {\em s\_region\/}, {\em s\_resolution\/}, {\em em\_res\_power\/}, {\em em\_resolution\/}). We propose adding optional attributes ({\em s\_ref\_energy\/} for spatial characterization attributes and {\em em\_ref\_energy\/} for spectral characterization attributes) that define the energy (in units of eV) at which these characteristic values are specified. +For \gls{HEA} datasets that typically span decades of energy, spatial resolution and sky coverage, and spectral resolution, can be strongly dependent on particle energy. The ObsCore Recommendation suggests that in such circumstances a {\em characteristic\/} value be specified for the spatial and spectral characterization attributes ({\em e.g.\/}, {\em s\_fov\/}, {\em s\_region\/}, {\em s\_resolution\/}, {\em em\_res\_power\/}, {\em em\_resolution\/}). We propose adding optional attributes ({\em s\_ref\_energy\/} for spatial characterization attributes and {\em em\_ref\_energy\/} for spectral characterization attributes) that define the energy (in units of eV) at which these characteristic values are specified. For some \gls{HEA} datasets, these attributes vary strongly with position in the field of view, typically as a function of off-axis angle ({\em i.e.\/}, the angular separation of the target or source from the telescope optical axis). We similarly propose adding optional attributes ({\em s\_ref\_oaa\/} for spatial characterization attributes and {\em em\_ref\_oaa\/} for spectral characterization attributes) that define the off-axis angle (in units of degrees) at which these characteristic values are specified. @@ -262,6 +262,8 @@ \subsection{{\em t\_intervals}} The global time bounds described by {\em t\_min\/}/{\em t\_max} in general are not sufficiently flexible when representing \gls{HEA} datasets and advanced data products from any waveband. The former are typically composed of many \glspl{STI}/\glspl{GTI}, where data are only valid during the stable or good intervals, while advanced data products may be constructed from multiple progenitor observations that can span decades from the start time of the first observations to the stop time of the last observation (albeit very sparsely). For both cases, data queries using only {\em t\_min\/}/{\em t\_max} may not be adequate to determine whether useful scientific data coincide with a transient cosmic phenomenon. In such cases, a more detailed knowledge of the observation time coverage is necessary. We propose to add a new optional attribute {\em t\_intervals} that would contain the list of observation intervals or STIs/GTIs as a TMOC description following the \gls{MOC} IVOA standard \citep{2022ivoa.spec.0727F}. This element could then be compared across data collections to make the data set selection via simple intersection or union operations in TMOC representation. +We recognize that performing such queries will require enhancements to ADQL, but this capability is sufficiently important for some \gls{HEA} data discovery scenarios that we have chosen to add {\em t\_intervals\/}, in anticipation that ADQL will eventually provide this functionality. + \subsection{{\em energy\_min\/}/{\em energy\_max\/}} The existing attributes {\em em\_min\/} and {\em em\_max\/} that define the coverage of the spectral axis (defined as wavelength expressed in units of m) are not user friendly for \gls{HEA} where datasets are generally selected according to an energy range ({\em i.e.\/}, inverse wavelength) in units of eV (or scaled units of eV, for example keV, MeV, GeV, TeV, PeV). Unlike the radio domain where $\lambda = c/\nu$, where $c$ is an almost universally remembered physical constant, the conversion $\lambda = hc/E$ is not simple for the user to express. As the spectral range covered by \gls{HEA} data is many decades larger than for other wavebands, the accurate numerical representations of typical \gls{HEA} spectral ranges as {\em em\_min\/}/{\em em\_max\/} requires quantities with many digits of precision and exponents ranging from $\sim\!10^{-5}$--$10^{-22}$. Since specification of the spectral range is largely fundamental to data discovery in the \gls{HEA} regime, we propose to add attributes {\em energy\_min\/} and {\em energy\_max\/} that specify the minimum and maximum spectral range values in units of eV\null. Note that the sense of these attributes is {\em opposite\/} that of {\em em\_min\/} and {\em em\_max\/} because of the inverse wavelength relationship between energy and wavelength, so numerical comparisons must be transposed ({\em e.g.\/}, $E>E_{\rm thresh}$ becomes $\lambda= 1,000,000 \end{enumerate} \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE (target_name = 'Cas A' OR -CONTAINS(POINT(s_ra, s_dec), CIRCLE, 350.8584, +58.8113, 0.16667) = 1) +CONTAINS(POINT(s_ra, s_dec), CIRCLE, 350.8584, +58.8113, 0.16667) = 1) AND (dataproduct_type = 'event-bundle') AND (ev_xel >= 1000000) \end{verbatim} @@ -53,15 +54,15 @@ \subsubsection{Use Case --- Search for event bundles via DataLink that include C \item Target name = ``Cas A'' or position inside 4 deg from (350.8584, $+58.8113$) \item dataproduct\_type = ``event-bundle'' \item obs\_collection = ``VERITAS-DR1'' - (iii) access\_format =``datalink'' + \item access\_format =``datalink'' \end{enumerate} \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE (target_name = 'Cas A' OR -CONTAINS(POINT(s_ra, s_dec), CIRCLE, 350.8584, +58.8113, 4.0) = 1) +CONTAINS(POINT(s_ra, s_dec), CIRCLE, 350.8584, +58.8113, 4.0) = 1) AND (dataproduct_type = 'event-bundle') AND (obs_collection = 'VERITAS-DR1') AND (access_format = ’application/x-votable+xml;content=datalink’) @@ -71,7 +72,7 @@ \subsubsection{Use Case --- Search for event bundles via DataLink that include C \subsubsection{Use Case --- Identify PSF response-functions for further analysis of previously downloaded data products} -{\em Identify all Chandra Source Catalog point spread functions for source detections that fall within 2 arcmin radius of (83.84358, $-5.43639$) in the Orion star-forming complex for Chandra observation 4374. These PSFs will be used to analyze previously downloadedcatalog data products for the same field.\/ } +{\em Identify all Chandra Source Catalog point spread functions for source detections that fall within 2 arcmin radius of (83.84358, $-5.43639$) in the Orion star-forming complex for Chandra observation 4374. These PSFs will be used to analyze previously downloaded catalog data products for the same field.\/ } \medskip \noindent Find all datasets satisfying: @@ -85,9 +86,9 @@ \subsubsection{Use Case --- Identify PSF response-functions for further analysis \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE -(CONTAINS(POINT(s_ra, s_dec), CIRCLE, 83.84358, -5.43639, 0.033333) = 1) +(CONTAINS(POINT(s_ra, s_dec), CIRCLE, 83.84358, -5.43639, 0.033333) = 1) AND (dataproduct_type = 'response-function') AND (dataproduct_subtype = 'psf') AND (obs_id = '4374') @@ -100,7 +101,7 @@ \subsubsection{Use Case --- Get all the \glspl{IRF} for a given CTAO observation {\em Simulations are frequently used to estimate the science performance for a given astrophysical use case. To realise such simulations, \glspl{IRF} are required. \/ } \medskip -\noindent Find the CTAO dataset satisfying: +\noindent Find the CTAO datasets satisfying: \begin{enumerate}[(i)] \item dataproduct\_type = ``aeff'' or dataproduct\_type = ``psf'' or dataproduct\_type = ``edisp'' or dataproduct\_type = ``bkgrate'' \item obs\_id = ``4374'' @@ -109,9 +110,10 @@ \subsubsection{Use Case --- Get all the \glspl{IRF} for a given CTAO observation \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE -(dataproduct_type = 'event-list' OR dataproduct_type = 'aeff' OR dataproduct_type = 'edisp' +(dataproduct_type = 'event-list' OR dataproduct_type = 'aeff' + OR dataproduct_type = 'edisp' OR dataproduct_type = 'psf' OR dataproduct_type = 'bkgrate') AND (obs_id = '4374') AND (obs_collection = 'CTAO-DR1') @@ -134,10 +136,10 @@ \subsubsection{Use Case --- Search for event lists that include a fully calibrat \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE (target_name = 'BL Lac' OR -CONTAINS(POINT(s_ra, s_dec), CIRCLE, 330.680338, +42.27777, 0.083333) = 1) +CONTAINS(POINT(s_ra, s_dec), CIRCLE, 330.680338, +42.27777, 0.083333) = 1) AND (dataproduct_type = 'event-list') AND (calib_level >= 2) AND (em_calib_status = 'calibrated') @@ -159,10 +161,10 @@ \subsubsection{Use Case --- Search for spatially resolved spectropolarimetric ob \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE (target_name = 'Crab' OR target_name = 'M1' OR -CONTAINS(POINT(s_ra, s_dec), CIRCLE, 83.6324, +22.0174, 0.083333) = 1) +CONTAINS(POINT(s_ra, s_dec), CIRCLE, 83.6324, +22.0174, 0.083333) = 1) AND (dataproduct_type = 'event-bundle') AND (calib_level >= 2) AND (s_resolution > 100) @@ -187,9 +189,9 @@ \subsubsection{Use Case --- Search for SWGO event lists and their \glspl{IRF} f First, run the ObCore query: \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE (INTERSECTS(s_region, CIRCLE(312.775, 30.683, 1.5)) = 1) -AND (dataproduct_type = 'event-list' OR dataproduct_type = 'aeff' OR dataproduct_type = 'edisp' +AND (dataproduct_type = 'event-list' OR dataproduct_type = 'aeff' OR dataproduct_type = 'edisp' OR dataproduct_type = 'psf' OR dataproduct_type = 'bkgrate') AND (obs_collection = 'SWGO-DR1') AND (event_type = 'very-good') @@ -214,14 +216,14 @@ \subsubsection{Use Case --- Search for event lists and their \glspl{IRF} of CTAO First, run the ObCore query: \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he -(dataproduct_type = 'event-list') +NATURAL JOIN ivoa.obscore_hea +WHERE (dataproduct_type = 'event-list') AND (obs_collection = 'CTAO-DR1') AND (access_format = 'application/x-votable+xml;content=datalink') AND (instrument_name LIKE 'CTAO-S') AND (ENERGYMAX GE 10e+12) \end{verbatim} -The query output is a VOTable that follows the DALI specification. +The query output is a VOTable that follows the DALI specification. \medskip \noindent Then, process this VOTABLE to access to the data: @@ -234,21 +236,22 @@ \subsubsection{Use Case --- Search for event lists and their \glspl{IRF} of CTAO \begin{verbatim} FOR EACH ROW OF OUTPUT_VOTABLE: - OBS_ID = OUTPUT_VOTABLE['ID'] # To Be Checked - Mathieu, Mireille? - DATALINK_TABLE = GET OUTPUT_VOTABLE['access_url'] # To Be Checked - Mathieu, Mireille? + OBS_ID = OUTPUT_VOTABLE['ID'] + DATALINK_TABLE = GET OUTPUT_VOTABLE['access_url'] FOR EACH ROW OF DATALINK_TABLE: - IF ROW['content_qualifier'] = 'aeff' - AEFF_FILE['OBS_ID'] = GET RAW['accessURL'] # To Be Checked - Mathieu, Mireille? + AEFF_FILE['OBS_ID'] = GET RAW['accessURL'] - IF ROW['content_qualifier'] = 'edisp' - EDISP_FILE['OBS_ID'] = GET RAW['accessURL'] # To Be Checked - Mathieu, Mireille? + EDISP_FILE['OBS_ID'] = GET RAW['accessURL'] - IF ROW['content_qualifier'] = 'psf' - PSF_FILE['OBS_ID'] = GET RAW['accessURL'] # To Be Checked - Mathieu, Mireille? + PSF_FILE['OBS_ID'] = GET RAW['accessURL'] - IF ROW['content_qualifier'] = 'bkgrate' - BKGRATE_FILE['OBS_ID'] = GET RAW['accessURL'] # To Be Checked - Mathieu, Mireille? + BKGRATE_FILE['OBS_ID'] = GET RAW['accessURL'] - IF ROW['content_qualifier'] = 'event-list' - EVENT_FILE['OBS_ID'] = GET RAW['accessURL'] # To Be Checked - Mathieu, Mireille? + EVENT_FILE['OBS_ID'] = GET RAW['accessURL'] \end{verbatim} +\todo[inline]{To Be Checked - Mathieu, Mireille?} \subsection{Very-High-Level Data Products} @@ -268,9 +271,9 @@ \subsubsection{Use Case --- Search for Chandra Source Catalog position error MCM \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE -(CONTAINS(POINT(s_ra, s_dec), CIRCLE, 54.036061, +61.907633, 0.0013888) = 1) +(CONTAINS(POINT(s_ra, s_dec), CIRCLE, 54.036061, +61.907633, 0.0013888) = 1) AND (dataproduct_type = 'draws') AND (dataproduct_subtype = 'poserr') AND (obs_collection = 'CSC2') @@ -289,19 +292,19 @@ \subsubsection{Use Case --- Search for flux maps for CTAO-North observations bet \item obs\_collection = ``CTAO-DR1'' \item int(obs\_id) >= 4374 \item int(obs\_id) <= 4379 - \item ``CTA-N'' in instrument\_name + \item ``CTAO-N'' in instrument\_name \end{enumerate} \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE (dataproduct_type = 'image') AND (dataproduct_subtype = 'flux-map') -AND (obs_collection = 'CTAO-HL-DR1') +AND (obs_collection = 'CTAO-DR1') AND (instrument_name LIKE 'CTAO-N') -AND (CAST(obs\_id AS INTEGER) > 4374) -AND (CAST(obs\_id AS INTEGER) <= 4379) +AND (CAST(obs_id AS INTEGER) > 4374) +AND (CAST(obs_id AS INTEGER) <= 4379) \end{verbatim} @@ -316,15 +319,15 @@ \subsubsection{Use Case --- Search for M31 source light curves and aperture phot \item dataproduct\_type = ``timeseries'' {\em or\/} ``pdf'' \item calib\_level = 4 \item energy\_min $\leq 0.3$ {\em and\/} energy\_max $\geq 7.0$ - \item t\_intervals TMOC intersects MJD 56320--56325 TT + \item t\_intervals TMOC intersects\footnote{We note that this functiondoes not yet exist in ADQL} MJD 56320--56325 TT \end{enumerate} \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE -(CONTAINS(POINT(s_ra, s_dec), CIRCLE, 10.6847, +41.2688, 1.5) = 1) +(CONTAINS(POINT(s_ra, s_dec), CIRCLE, 10.6847, +41.2688, 1.5) = 1) AND ((dataproduct_type = 'timeseries') OR (dataproduct_type = 'pdf')) AND (calib_level = 4) AND (energy_min <= 300.0) AND (energy_max >= 7000.0) @@ -339,24 +342,22 @@ \subsubsection{Use Case --- Search for the CTAO flux light curves of PKS 2155-30 \medskip \noindent Find all datasets satisfying: \begin{enumerate}[(i)] - \item dataproduct\_type = ``image''. - \item dataproduct\_subtype = ``flux-map''. + \item dataproduct\_type = ``timeseries'' + \item dataproduct\_subtype = ``flux''. \item obs\_collection = ``CTAO-DR1''. - \item tmin >= 62502 ({\em e.g.\/} 2030-01-01) - \item tmax >= 62866 ({\em e.g.\/} 2030-12-31) + \item tmin >= 62502 ({\em e.g.\/} 2030-01-01) + \item tmax >= 62866 ({\em e.g.\/} 2030-12-31) \item target\_name = ``PKS 2155-304'' \end{enumerate} \begin{verbatim} SELECT * FROM ivoa.obscore -NATURAL JOIN ivoa.obscore_he +NATURAL JOIN ivoa.obscore_hea WHERE -(dataproduct_type = 'image') -AND (dataproduct_type = 'timeseries') +(dataproduct_type = 'timeseries') AND (dataproduct_subtype = 'flux') -AND (obs_collection = 'CTAO-HL-DR1') +AND (obs_collection = 'CTAO-DR1') AND (target_name = 'PKS 2155-304') AND (tmin >= 62502) AND (tmax <= 62866) \end{verbatim} -