diff --git a/pyenzyme/thinlayers/base.py b/pyenzyme/thinlayers/base.py index a814cd1..9eb6346 100644 --- a/pyenzyme/thinlayers/base.py +++ b/pyenzyme/thinlayers/base.py @@ -1,8 +1,9 @@ from abc import ABC, abstractmethod from functools import cached_property -from typing import Dict, List, Optional, Tuple, TypeAlias +from typing import Dict, List, Optional, Set, Tuple, TypeAlias import pandas as pd +from sympy import Symbol, sympify import pyenzyme as pe from pyenzyme.versions import v2 @@ -79,7 +80,9 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum enzmldoc = enzmldoc.model_copy(deep=True) # Collect all species that are explicitly modeled + all_species = BaseThinLayer._get_all_species(enzmldoc) modeled_species = set() + equations = [] # Add species from reactions (reactants and products) for reaction in enzmldoc.reactions: @@ -88,11 +91,24 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum ) modeled_species.update(product.species_id for product in reaction.products) + if reaction.kinetic_law: + equations.append(sympify(reaction.kinetic_law.equation)) + # Add species from ODE equations + for equation in enzmldoc.equations: + if equation.equation_type == v2.EquationType.ODE: + equations.append(sympify(equation.equation)) + modeled_species.add(equation.species_id) + elif ( + equation.equation_type == v2.EquationType.ASSIGNMENT + or equation.equation_type == v2.EquationType.INITIAL_ASSIGNMENT + ): + equations.append(sympify(equation.equation)) + + # Find species referenced in equations + equation_symbols = {symbol for eq in equations for symbol in eq.free_symbols} modeled_species.update( - equation.species_id - for equation in enzmldoc.equations - if equation.equation_type == v2.EquationType.ODE + species for species in all_species if Symbol(species) in equation_symbols ) if not modeled_species: @@ -132,6 +148,18 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum return enzmldoc + @staticmethod + def _get_all_species(enzmldoc: v2.EnzymeMLDocument) -> Set[str]: + """ + Gets all species from the EnzymeML document. + """ + return set( + species.id + for species in enzmldoc.small_molecules + + enzmldoc.proteins + + enzmldoc.complexes + ) + @abstractmethod def integrate( self, diff --git a/pyenzyme/thinlayers/psyces.py b/pyenzyme/thinlayers/psyces.py index f7d2115..caacf79 100644 --- a/pyenzyme/thinlayers/psyces.py +++ b/pyenzyme/thinlayers/psyces.py @@ -6,19 +6,19 @@ from __future__ import annotations +import contextlib +import io +import os from dataclasses import dataclass from pathlib import Path -from joblib import Parallel, delayed +from typing import Dict, List, Optional, Tuple + import dill import numpy as np import pandas as pd -import os -import contextlib -import io - -from typing import Dict, List, Optional, Tuple +from joblib import Parallel, delayed -from pyenzyme.thinlayers.base import BaseThinLayer, SimResult, Time, InitCondDict +from pyenzyme.thinlayers.base import BaseThinLayer, InitCondDict, SimResult, Time from pyenzyme.versions import v2 try: @@ -242,7 +242,6 @@ def optimize(self, method="leastsq"): return result - def write(self) -> v2.EnzymeMLDocument: """ Creates a new EnzymeML document with optimized parameter values. @@ -297,9 +296,9 @@ def _initialize_parameters(self): for param in self.enzmldoc.parameters: # Build kwargs dictionary with conditional assignments kwargs = { - **({'min': param.lower_bound} if param.lower_bound is not None else {}), - **({'max': param.upper_bound} if param.upper_bound is not None else {}), - **({'vary': param.fit}), + **({"min": param.lower_bound} if param.lower_bound is not None else {}), + **({"max": param.upper_bound} if param.upper_bound is not None else {}), + **({"vary": param.fit}), } # Determine parameter value @@ -542,10 +541,13 @@ def to_pysces_model(self, model: pysces.model): """ model = dill.loads(dill.dumps(model)) model.sim_time = np.array(self.time) - model.__dict__.update( - { - f"{species_id}_init": value if value != 0 else 1.0e-9 - for species_id, value in self.species.items() - } - ) + + for species, value in self.species.items(): + if hasattr(model, f"{species}_init"): + setattr(model, f"{species}_init", value) + elif hasattr(model, species): + setattr(model, species, value) + else: + raise ValueError(f"Species {species} not found in model") + return model diff --git a/tests/unit/test_tabular.py b/tests/unit/test_tabular.py index 13d786c..1635fb5 100644 --- a/tests/unit/test_tabular.py +++ b/tests/unit/test_tabular.py @@ -80,6 +80,10 @@ def test_csv_import(self): assert len(m.species_data[1].time) == 11, ( f"Expected 10 time points. Got {len(m.species_data[1].time)}" ) + + assert m.species_data[0].data_unit is not None, ( + f"Expected data unit. Got {m.species_data[0].data_unit}" + ) assert m.species_data[0].data_unit.name == "mmol / l", ( f"Expected mM. Got {m.species_data[0].data_unit}" ) @@ -93,6 +97,9 @@ def test_csv_import(self): f"Expected None. Got {m.species_data[0].time_unit}" ) + assert m.species_data[1].data_unit is not None, ( + f"Expected data unit. Got {m.species_data[1].data_unit}" + ) assert m.species_data[1].data_unit.name == "mmol / l", ( f"Expected mM. Got {m.species_data[1].data_unit.name}" ) diff --git a/tests/unit/test_thinlayer.py b/tests/unit/test_thinlayer.py index e232234..94d2c12 100644 --- a/tests/unit/test_thinlayer.py +++ b/tests/unit/test_thinlayer.py @@ -1,5 +1,5 @@ from pyenzyme.thinlayers.base import BaseThinLayer -from pyenzyme.versions.v2 import EnzymeMLDocument, EquationType +from pyenzyme.versions.v2 import EnzymeMLDocument, Equation, EquationType # Mock data for creating test species measurements MOCK_DATA = { @@ -96,6 +96,205 @@ def test_remove_unmodeled_species_odes(self): f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}." ) + def test_includes_species_in_ode_equation(self): + """ + Test that species referenced in equations are kept in the document even when + they are not defined as ODEs or part of reactions. + + This test verifies that: + - A species that appears in an equation (but not in reactions or ODEs) is kept + - The species can have initial values in measurements without time course data + - The filtering function correctly identifies equation-referenced species as modeled + """ + enzmldoc = self._create_enzmldoc() + + # Add a new species that will only appear in equations (not reactions/ODEs) + equation_only_species = enzmldoc.add_to_small_molecules( + id="p1", + name="Equation Only Species", + ) + + # Add an initial for the species to each measurement + for i, measurement in enumerate(enzmldoc.measurements): + measurement.add_to_species_data( + species_id=equation_only_species.id, + initial=i * 2.0, + data=[], + time=[], + ) + + # Add an assignment equation that references this species + # This species is NOT an ODE and NOT part of any reaction + enzmldoc.add_to_equations( + species_id="Product", # Left side of equation + equation_type=EquationType.ODE, + equation="p1 * 2.0", # p1 appears in the equation + ) + + # Verify the species exists before filtering + assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules] + + # Remove unmodeled species + thinlayer = MockThinLayer(enzmldoc) + tl_enzmldoc = thinlayer.optimize() + + assert equation_only_species.id in [ + s.id for s in tl_enzmldoc.small_molecules + ], ( + f"Species '{equation_only_species.id}' should be kept because it appears in an equation, " + f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}" + ) + + def test_includes_species_in_assignment_equation(self): + """ + Test that species referenced in equations are kept in the document even when + they are not defined as ODEs or part of reactions. + + This test verifies that: + - A species that appears in an equation (but not in reactions or ODEs) is kept + - The species can have initial values in measurements without time course data + - The filtering function correctly identifies equation-referenced species as modeled + """ + enzmldoc = self._create_enzmldoc() + + # Add a new species that will only appear in equations (not reactions/ODEs) + equation_only_species = enzmldoc.add_to_small_molecules( + id="p1", + name="Equation Only Species", + ) + + # Add an initial for the species to each measurement + for i, measurement in enumerate(enzmldoc.measurements): + measurement.add_to_species_data( + species_id=equation_only_species.id, + initial=i * 2.0, + data=[], + time=[], + ) + + # Add an assignment equation that references this species + # This species is NOT an ODE and NOT part of any reaction + enzmldoc.add_to_equations( + species_id="Product", # Left side of equation + equation_type=EquationType.ASSIGNMENT, + equation="p1 * 2.0", # p1 appears in the equation + ) + + # Verify the species exists before filtering + assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules] + + # Remove unmodeled species + thinlayer = MockThinLayer(enzmldoc) + tl_enzmldoc = thinlayer.optimize() + + assert equation_only_species.id in [ + s.id for s in tl_enzmldoc.small_molecules + ], ( + f"Species '{equation_only_species.id}' should be kept because it appears in an equation, " + f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}" + ) + + def test_includes_species_in_initial_assignment_equation(self): + """ + Test that species referenced in equations are kept in the document even when + they are not defined as ODEs or part of reactions. + + This test verifies that: + - A species that appears in an equation (but not in reactions or ODEs) is kept + - The species can have initial values in measurements without time course data + - The filtering function correctly identifies equation-referenced species as modeled + """ + enzmldoc = self._create_enzmldoc() + + # Add a new species that will only appear in equations (not reactions/ODEs) + equation_only_species = enzmldoc.add_to_small_molecules( + id="p1", + name="Equation Only Species", + ) + + # Add an initial for the species to each measurement + for i, measurement in enumerate(enzmldoc.measurements): + measurement.add_to_species_data( + species_id=equation_only_species.id, + initial=i * 2.0, + data=[], + time=[], + ) + + # Add an assignment equation that references this species + # This species is NOT an ODE and NOT part of any reaction + enzmldoc.add_to_equations( + species_id="Product", # Left side of equation + equation_type=EquationType.INITIAL_ASSIGNMENT, + equation="p1 * 2.0", # p1 appears in the equation + ) + + # Verify the species exists before filtering + assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules] + + # Remove unmodeled species + thinlayer = MockThinLayer(enzmldoc) + tl_enzmldoc = thinlayer.optimize() + + assert equation_only_species.id in [ + s.id for s in tl_enzmldoc.small_molecules + ], ( + f"Species '{equation_only_species.id}' should be kept because it appears in an equation, " + f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}" + ) + + def test_includes_species_in_kinetic_law_equation(self): + """ + Test that species referenced in equations are kept in the document even when + they are not defined as ODEs or part of reactions. + + This test verifies that: + - A species that appears in an equation (but not in reactions or ODEs) is kept + - The species can have initial values in measurements without time course data + - The filtering function correctly identifies equation-referenced species as modeled + """ + enzmldoc = self._create_enzmldoc() + + # Add a new species that will only appear in equations (not reactions/ODEs) + equation_only_species = enzmldoc.add_to_small_molecules( + id="p1", + name="Equation Only Species", + ) + + # Add an initial for the species to each measurement + for i, measurement in enumerate(enzmldoc.measurements): + measurement.add_to_species_data( + species_id=equation_only_species.id, + initial=i * 2.0, + data=[], + time=[], + ) + + # Add a reaction that references this species + # This species is NOT an ODE and NOT part of any reaction + reaction = enzmldoc.add_to_reactions(id="R1", name="R1") + reaction.add_to_reactants(species_id="Substrate", stoichiometry=1) + reaction.add_to_products(species_id="Product", stoichiometry=1) + reaction.kinetic_law = Equation( + equation="p1 * 2.0", + equation_type=EquationType.RATE_LAW, + species_id="v", + ) + + # Verify the species exists before filtering + assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules] + + # Remove unmodeled species + thinlayer = MockThinLayer(enzmldoc) + tl_enzmldoc = thinlayer.optimize() + + assert equation_only_species.id in [ + s.id for s in tl_enzmldoc.small_molecules + ], ( + f"Species '{equation_only_species.id}' should be kept because it appears in a reaction, " + f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}" + ) + def _create_enzmldoc(self) -> EnzymeMLDocument: """ Create a test EnzymeML document with various measurement scenarios. @@ -116,7 +315,7 @@ def _create_enzmldoc(self) -> EnzymeMLDocument: # Add small molecules substrate = enzmldoc.add_to_small_molecules(id="Substrate", name="Substrate") product = enzmldoc.add_to_small_molecules(id="Product", name="Product") - unmodeled = enzmldoc.add_to_small_molecules(id="Unmodeled", name="Unmodeled") + unmodeled = enzmldoc.add_to_small_molecules(id="s1", name="Unmodeled") # Add a measurement with unmodeled species measurement = enzmldoc.add_to_measurements(id="M1", name="M1")