diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 29c7837..90db3ce 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10', '3.11', '3.12', '3.13'] + python-version: ['3.11', '3.12', '3.13'] steps: - name: Checkout repository diff --git a/README.md b/README.md index 703ea4d..4728a39 100644 --- a/README.md +++ b/README.md @@ -124,21 +124,8 @@ isort . && ruff format && ruff check && mypy nuclearmasses ## Known issues -- [#5](https://github.com/php1ic/nuclearmasses/issues/5) The half life from the NUBASE data is stored as the individual elements, a column with the value in seconds would be useful -```python ->>> df[(df['A'] == 14) & (df['Symbol'] == 'C')][['HalfLifeValue', 'HalfLifeUnit', 'HalfLifeError']] - HalfLifeValue HalfLifeUnit HalfLifeError -TableYear -1983 NaN NaN -1993 NaN NaN -1995 5.73 ky 0.04 -2003 5.70 ky 0.03 -2012 5.70 ky 0.03 -2016 5.70 ky 0.03 -2020 5.70 ky 0.03 -``` - [#6](https://github.com/php1ic/nuclearmasses/issues/6) The decay mode field from the NUBASE data is stored 'as-is' from the file. It looks like it can be split on the ';' character for isotopes where there is more than one mode. A dictionary of {decay mode: fraction} may be the best way to store all of this information. -- [#7](https://github.com/php1ic/nuclearmasses/issues/7) Information from anything other than the ground state of an isotope is ignored when parsing the NUABSE file. +- [#7](https://github.com/php1ic/nuclearmasses/issues/7) Information from anything other than the ground state of an isotope is ignored when parsing the NUBASE file. The selection of what is and what is not included appears random to me which is why I simply ignored for the moment. diff --git a/nuclearmasses/element_converter.py b/nuclearmasses/element_converter.py index 4a07eb3..076f535 100644 --- a/nuclearmasses/element_converter.py +++ b/nuclearmasses/element_converter.py @@ -1,3 +1,8 @@ +import astropy # type: ignore[import-untyped] +import numpy as np +import pandas as pd + + class ElementConverter: """A utility class for converting between symbol and Z value @@ -29,3 +34,11 @@ def __init__(self) -> None: # Switch the keys and values of the z_to_symbol dictionary so a user can access the Z value using the symbol self.symbol_to_z: dict[str, int] = {val: key for key, val in self.z_to_symbol.items()} + + def unit_to_seconds(self, unit_str: str) -> float: + if pd.isna(unit_str): + return np.nan + try: + return float(astropy.units.Unit(unit_str).to(astropy.units.s)) + except Exception: + return np.nan diff --git a/nuclearmasses/nubase_parse.py b/nuclearmasses/nubase_parse.py index 61cfc96..5e21b27 100644 --- a/nuclearmasses/nubase_parse.py +++ b/nuclearmasses/nubase_parse.py @@ -17,9 +17,13 @@ class NUBASEParser(NUBASEFile): def __init__(self, filename: pathlib.Path, year: int): """Set the file to read and the table year.""" - self.filename = filename - self.year = year - super().__init__(self.year) + super().__init__(year) + self.filename: pathlib.Path = filename + self.year: int = year + self.unit_replacements: dict[str, str] = { + r"y$": "yr", + r"^m$": "min", + } logging.info(f"Reading {self.filename} from {self.year}") def _column_names(self) -> list[str]: @@ -75,6 +79,8 @@ def _data_types(self) -> dict: "HalfLifeValue": "float64", "HalfLifeUnit": "string", "HalfLifeError": "float64", + "HalfLifeSeconds": "float64", + "HalfLifeErrorSeconds": "float64", "Spin": "string", "DecayModes": "string", } @@ -93,6 +99,8 @@ def _data_types(self) -> dict: "HalfLifeValue": "float64", "HalfLifeUnit": "string", "HalfLifeError": "float64", + "HalfLifeSeconds": "float64", + "HalfLifeErrorSeconds": "float64", "Spin": "string", "DiscoveryYear": "Int64", "DecayModes": "string", @@ -157,11 +165,25 @@ def read_file(self) -> pd.DataFrame: # Convert stable isotopes into ones with enormous lifetimes with zero error so we can cast mask = df["HalfLifeValue"] == "stbl" - df.loc[mask, ["HalfLifeValue", "HalfLifeUnit", "HalfLifeError"]] = (99.99, "Zy", 0.0) + df.loc[mask, ["HalfLifeValue", "HalfLifeUnit", "HalfLifeError"]] = (99.99, "Zyr", 0.0) df["HalfLifeValue"] = df["HalfLifeValue"].astype("string").str.replace(r"[<>?~]", "", regex=True) # We'll be lazy here and remove any characters in this column. Future us will parse this properly df["HalfLifeError"] = df["HalfLifeError"].astype("string").str.replace(r"[<>?~a-z]", "", regex=True) + + # Use the 3 half-life columns to create 2 new columns with units of seconds + df["HalfLifeUnit"] = df["HalfLifeUnit"].astype("string") + for pattern, replacement in self.unit_replacements.items(): + df["HalfLifeUnit"] = df["HalfLifeUnit"].str.replace(pattern, replacement, regex=True) + + # Ensure numeric values + for col in ["HalfLifeValue", "HalfLifeError"]: + df[col] = pd.to_numeric(df[col], errors="coerce") + # Pre-compute unit -> second conversions + unit_map = df["HalfLifeUnit"].map(self.unit_to_seconds) + + df["HalfLifeSeconds"] = df["HalfLifeValue"] * unit_map + df["HalfLifeErrorSeconds"] = df["HalfLifeError"] * unit_map except ValueError as e: print(f"Parsing error: {e}") diff --git a/pyproject.toml b/pyproject.toml index 8c6f774..bcf00c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ authors = [ description = "A python package to parse and store the various files published by AME and NUBASE" version = "0.0.1" readme = "README.md" -requires-python = ">=3.10" +requires-python = ">=3.11" classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Education", @@ -20,7 +20,6 @@ classifiers = [ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", @@ -30,6 +29,8 @@ classifiers = [ "Topic :: Scientific/Engineering :: Physics", ] dependencies = [ + "astropy", + "numpy", "pandas", "pytest", ] diff --git a/tests/test_nubase_parse.py b/tests/test_nubase_parse.py index 31bd10f..0fa23fa 100644 --- a/tests/test_nubase_parse.py +++ b/tests/test_nubase_parse.py @@ -24,8 +24,10 @@ def test_1995_nubase(): "NUBASEMassExcess": [-60085], "NUBASEMassExcessError": [29], "HalfLifeValue": [2.99], - "HalfLifeUnit": ["m"], + "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], + "HalfLifeSeconds": [179.4], + "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DecayModes": ["B-=100"], } @@ -54,8 +56,10 @@ def test_2003_nubase(): "NUBASEMassExcess": [-60070], "NUBASEMassExcessError": [30], "HalfLifeValue": [2.99], - "HalfLifeUnit": ["m"], + "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], + "HalfLifeSeconds": [179.4], + "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DecayModes": ["B-=100"], } @@ -84,8 +88,10 @@ def test_2012_nubase(): "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], "HalfLifeValue": [2.99], - "HalfLifeUnit": ["m"], + "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], + "HalfLifeSeconds": [179.4], + "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DiscoveryYear": [1960], "DecayModes": ["B-=100"], @@ -115,8 +121,10 @@ def test_2016_nubase(): "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], "HalfLifeValue": [2.99], - "HalfLifeUnit": ["m"], + "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], + "HalfLifeSeconds": [179.4], + "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DiscoveryYear": [1960], "DecayModes": ["B-=100"], @@ -147,8 +155,10 @@ def test_2020_nubase(): "NUBASEMassExcess": [-60060], "NUBASEMassExcessError": [30], "HalfLifeValue": [2.99], - "HalfLifeUnit": ["m"], + "HalfLifeUnit": ["min"], "HalfLifeError": [0.07], + "HalfLifeSeconds": [179.4], + "HalfLifeErrorSeconds": [4.2], "Spin": ["3+"], "DiscoveryYear": [1960], "DecayModes": ["B-=100"],