EnzymeML · JR-1991 · Oct 14, 2025 · Oct 9, 2025 · Oct 9, 2025 · Oct 14, 2025
diff --git a/pyenzyme/thinlayers/base.py b/pyenzyme/thinlayers/base.py
@@ -1,8 +1,9 @@
 from abc import ABC, abstractmethod
 from functools import cached_property
-from typing import Dict, List, Optional, Tuple, TypeAlias
+from typing import Dict, List, Optional, Set, Tuple, TypeAlias
 
 import pandas as pd
+from sympy import Symbol, sympify
 
 import pyenzyme as pe
 from pyenzyme.versions import v2
@@ -79,7 +80,9 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
         enzmldoc = enzmldoc.model_copy(deep=True)
 
         # Collect all species that are explicitly modeled
+        all_species = BaseThinLayer._get_all_species(enzmldoc)
         modeled_species = set()
+        equations = []
 
         # Add species from reactions (reactants and products)
         for reaction in enzmldoc.reactions:
@@ -88,11 +91,24 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
             )
             modeled_species.update(product.species_id for product in reaction.products)
 
+            if reaction.kinetic_law:
+                equations.append(sympify(reaction.kinetic_law.equation))
+
         # Add species from ODE equations
+        for equation in enzmldoc.equations:
+            if equation.equation_type == v2.EquationType.ODE:
+                equations.append(sympify(equation.equation))
+                modeled_species.add(equation.species_id)
+            elif (
+                equation.equation_type == v2.EquationType.ASSIGNMENT
+                or equation.equation_type == v2.EquationType.INITIAL_ASSIGNMENT
+            ):
+                equations.append(sympify(equation.equation))
+
+        # Find species referenced in equations
+        equation_symbols = {symbol for eq in equations for symbol in eq.free_symbols}
         modeled_species.update(
-            equation.species_id
-            for equation in enzmldoc.equations
-            if equation.equation_type == v2.EquationType.ODE
+            species for species in all_species if Symbol(species) in equation_symbols
         )
 
         if not modeled_species:
@@ -132,6 +148,18 @@ def _remove_unmodeled_species(enzmldoc: v2.EnzymeMLDocument) -> v2.EnzymeMLDocum
 
         return enzmldoc
 
+    @staticmethod
+    def _get_all_species(enzmldoc: v2.EnzymeMLDocument) -> Set[str]:
+        """
+        Gets all species from the EnzymeML document.
+        """
+        return set(
+            species.id
+            for species in enzmldoc.small_molecules
+            + enzmldoc.proteins
+            + enzmldoc.complexes
+        )
+
     @abstractmethod
     def integrate(
         self,

diff --git a/pyenzyme/thinlayers/psyces.py b/pyenzyme/thinlayers/psyces.py
@@ -6,19 +6,19 @@
 
 from __future__ import annotations
 
+import contextlib
+import io
+import os
 from dataclasses import dataclass
 from pathlib import Path
-from joblib import Parallel, delayed
+from typing import Dict, List, Optional, Tuple
+
 import dill
 import numpy as np
 import pandas as pd
-import os
-import contextlib
-import io
-
-from typing import Dict, List, Optional, Tuple
+from joblib import Parallel, delayed
 
-from pyenzyme.thinlayers.base import BaseThinLayer, SimResult, Time, InitCondDict
+from pyenzyme.thinlayers.base import BaseThinLayer, InitCondDict, SimResult, Time
 from pyenzyme.versions import v2
 
 try:
@@ -242,7 +242,6 @@ def optimize(self, method="leastsq"):
 
         return result
 
-
     def write(self) -> v2.EnzymeMLDocument:
         """
         Creates a new EnzymeML document with optimized parameter values.
@@ -297,9 +296,9 @@ def _initialize_parameters(self):
         for param in self.enzmldoc.parameters:
             # Build kwargs dictionary with conditional assignments
             kwargs = {
-                **({'min': param.lower_bound} if param.lower_bound is not None else {}),
-                **({'max': param.upper_bound} if param.upper_bound is not None else {}),
-                **({'vary': param.fit}),
+                **({"min": param.lower_bound} if param.lower_bound is not None else {}),
+                **({"max": param.upper_bound} if param.upper_bound is not None else {}),
+                **({"vary": param.fit}),
             }
 
             # Determine parameter value
@@ -542,10 +541,13 @@ def to_pysces_model(self, model: pysces.model):
         """
         model = dill.loads(dill.dumps(model))
         model.sim_time = np.array(self.time)
-        model.__dict__.update(
-            {
-                f"{species_id}_init": value if value != 0 else 1.0e-9
-                for species_id, value in self.species.items()
-            }
-        )
+
+        for species, value in self.species.items():
+            if hasattr(model, f"{species}_init"):
+                setattr(model, f"{species}_init", value)
+            elif hasattr(model, species):
+                setattr(model, species, value)
+            else:
+                raise ValueError(f"Species {species} not found in model")
+
         return model
diff --git a/tests/unit/test_tabular.py b/tests/unit/test_tabular.py
@@ -80,6 +80,10 @@ def test_csv_import(self):
             assert len(m.species_data[1].time) == 11, (
                 f"Expected 10 time points. Got {len(m.species_data[1].time)}"
             )
+
+            assert m.species_data[0].data_unit is not None, (
+                f"Expected data unit. Got {m.species_data[0].data_unit}"
+            )
             assert m.species_data[0].data_unit.name == "mmol / l", (
                 f"Expected mM. Got {m.species_data[0].data_unit}"
             )
@@ -93,6 +97,9 @@ def test_csv_import(self):
                     f"Expected None. Got {m.species_data[0].time_unit}"
                 )
 
+            assert m.species_data[1].data_unit is not None, (
+                f"Expected data unit. Got {m.species_data[1].data_unit}"
+            )
             assert m.species_data[1].data_unit.name == "mmol / l", (
                 f"Expected mM. Got {m.species_data[1].data_unit.name}"
             )

diff --git a/tests/unit/test_thinlayer.py b/tests/unit/test_thinlayer.py
@@ -1,5 +1,5 @@
 from pyenzyme.thinlayers.base import BaseThinLayer
-from pyenzyme.versions.v2 import EnzymeMLDocument, EquationType
+from pyenzyme.versions.v2 import EnzymeMLDocument, Equation, EquationType
 
 # Mock data for creating test species measurements
 MOCK_DATA = {
@@ -96,6 +96,205 @@ def test_remove_unmodeled_species_odes(self):
             f"Unmodeled species should be removed, but appears in measurements {measurement_has_unmodeled}."
         )
 
+    def test_includes_species_in_ode_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add an assignment equation that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        enzmldoc.add_to_equations(
+            species_id="Product",  # Left side of equation
+            equation_type=EquationType.ODE,
+            equation="p1 * 2.0",  # p1 appears in the equation
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
+    def test_includes_species_in_assignment_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add an assignment equation that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        enzmldoc.add_to_equations(
+            species_id="Product",  # Left side of equation
+            equation_type=EquationType.ASSIGNMENT,
+            equation="p1 * 2.0",  # p1 appears in the equation
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
+    def test_includes_species_in_initial_assignment_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add an assignment equation that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        enzmldoc.add_to_equations(
+            species_id="Product",  # Left side of equation
+            equation_type=EquationType.INITIAL_ASSIGNMENT,
+            equation="p1 * 2.0",  # p1 appears in the equation
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in an equation, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
+    def test_includes_species_in_kinetic_law_equation(self):
+        """
+        Test that species referenced in equations are kept in the document even when
+        they are not defined as ODEs or part of reactions.
+
+        This test verifies that:
+        - A species that appears in an equation (but not in reactions or ODEs) is kept
+        - The species can have initial values in measurements without time course data
+        - The filtering function correctly identifies equation-referenced species as modeled
+        """
+        enzmldoc = self._create_enzmldoc()
+
+        # Add a new species that will only appear in equations (not reactions/ODEs)
+        equation_only_species = enzmldoc.add_to_small_molecules(
+            id="p1",
+            name="Equation Only Species",
+        )
+
+        # Add an initial for the species to each measurement
+        for i, measurement in enumerate(enzmldoc.measurements):
+            measurement.add_to_species_data(
+                species_id=equation_only_species.id,
+                initial=i * 2.0,
+                data=[],
+                time=[],
+            )
+
+        # Add a reaction that references this species
+        # This species is NOT an ODE and NOT part of any reaction
+        reaction = enzmldoc.add_to_reactions(id="R1", name="R1")
+        reaction.add_to_reactants(species_id="Substrate", stoichiometry=1)
+        reaction.add_to_products(species_id="Product", stoichiometry=1)
+        reaction.kinetic_law = Equation(
+            equation="p1 * 2.0",
+            equation_type=EquationType.RATE_LAW,
+            species_id="v",
+        )
+
+        # Verify the species exists before filtering
+        assert equation_only_species.id in [s.id for s in enzmldoc.small_molecules]
+
+        # Remove unmodeled species
+        thinlayer = MockThinLayer(enzmldoc)
+        tl_enzmldoc = thinlayer.optimize()
+
+        assert equation_only_species.id in [
+            s.id for s in tl_enzmldoc.small_molecules
+        ], (
+            f"Species '{equation_only_species.id}' should be kept because it appears in a reaction, "
+            f"but it was removed. Remaining species: {[s.id for s in tl_enzmldoc.small_molecules]}"
+        )
+
     def _create_enzmldoc(self) -> EnzymeMLDocument:
         """
         Create a test EnzymeML document with various measurement scenarios.
@@ -116,7 +315,7 @@ def _create_enzmldoc(self) -> EnzymeMLDocument:
         # Add small molecules
         substrate = enzmldoc.add_to_small_molecules(id="Substrate", name="Substrate")
         product = enzmldoc.add_to_small_molecules(id="Product", name="Product")
-        unmodeled = enzmldoc.add_to_small_molecules(id="Unmodeled", name="Unmodeled")
+        unmodeled = enzmldoc.add_to_small_molecules(id="s1", name="Unmodeled")
 
         # Add a measurement with unmodeled species
         measurement = enzmldoc.add_to_measurements(id="M1", name="M1")