diff --git a/.codecov.yml b/.codecov.yml index bf866b089c..c9dc0d1f29 100644 --- a/.codecov.yml +++ b/.codecov.yml @@ -4,3 +4,4 @@ ignore: - "libensemble/tools/live_data/*" - "libensemble/sim_funcs/executor_hworld.py" - "libensemble/gen_funcs/persistent_tasmanian.py" + - "libensemble/gen_classes/gpCAM.py" diff --git a/.flake8 b/.flake8 index 073989807d..87d249502e 100644 --- a/.flake8 +++ b/.flake8 @@ -38,6 +38,7 @@ per-file-ignores = # Need to set something before the APOSMM import libensemble/tests/regression_tests/test_persistent_aposmm*:E402 + libensemble/tests/regression_tests/test_asktell_aposmm_nlopt.py:E402 libensemble/tests/regression_tests/test_persistent_gp_multitask_ax.py:E402 libensemble/tests/functionality_tests/test_uniform_sampling_then_persistent_localopt_runs.py:E402 libensemble/tests/functionality_tests/test_stats_output.py:E402 diff --git a/.github/workflows/extra.yml b/.github/workflows/extra.yml index 25dda70476..47f49ae814 100644 --- a/.github/workflows/extra.yml +++ b/.github/workflows/extra.yml @@ -101,11 +101,9 @@ jobs: pip install -r install/misc_feature_requirements.txt source install/install_ibcdfo.sh conda install numpy scipy - - - name: Install libEnsemble, flake8, lock environment - run: | - pip install -e . - flake8 libensemble + conda install -c conda-forge pytorch-cpu + pip install --upgrade-strategy=only-if-needed git+https://github.com/xopt-org/xopt.git@generator_standard + pip install --no-deps git+https://github.com/optimas-org/optimas.git@main - name: Remove test using octave, gpcam on Python 3.13 if: matrix.python-version >= '3.13' @@ -113,6 +111,13 @@ jobs: rm ./libensemble/tests/regression_tests/test_persistent_fd_param_finder.py # needs octave, which doesn't yet support 3.13 rm ./libensemble/tests/regression_tests/test_persistent_aposmm_external_localopt.py # needs octave, which doesn't yet support 3.13 rm ./libensemble/tests/regression_tests/test_gpCAM.py # needs gpcam, which doesn't build on 3.13 + rm ./libensemble/tests/regression_tests/test_asktell_gpCAM.py # needs gpcam, which doesn't build on 3.13 + + - name: Install libEnsemble, flake8 + run: | + pip install git+https://github.com/campa-consortium/gest-api@main + pip install -e . + flake8 libensemble - name: Install redis/proxystore run: | diff --git a/docs/function_guides/ask_tell_generator.rst b/docs/function_guides/ask_tell_generator.rst new file mode 100644 index 0000000000..73f97124c3 --- /dev/null +++ b/docs/function_guides/ask_tell_generator.rst @@ -0,0 +1,21 @@ + +Ask/Tell Generators +=================== + +**BETA - SUBJECT TO CHANGE** + +These generators, implementations, methods, and subclasses are in BETA, and +may change in future releases. + +The Generator interface is expected to roughly correspond with CAMPA's standard: +https://github.com/campa-consortium/gest-api + +libEnsemble is in the process of supporting generator objects that implement the following interface: + +.. automodule:: generators + :members: Generator LibensembleGenerator + :undoc-members: + +.. autoclass:: Generator + :member-order: bysource + :members: diff --git a/docs/function_guides/function_guide_index.rst b/docs/function_guides/function_guide_index.rst index 621bf36d27..0539e24c60 100644 --- a/docs/function_guides/function_guide_index.rst +++ b/docs/function_guides/function_guide_index.rst @@ -13,6 +13,7 @@ These guides describe common development patterns and optional components: :caption: Writing User Functions generator + ask_tell_generator simulator allocator sim_gen_alloc_api diff --git a/docs/index.rst b/docs/index.rst index 9f7093ff10..7182746ab0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -26,6 +26,7 @@ tutorials/gpcam_tutorial tutorials/aposmm_tutorial tutorials/calib_cancel_tutorial + tutorials/xopt_bayesian_gen .. toctree:: :maxdepth: 1 diff --git a/docs/tutorials/tutorials.rst b/docs/tutorials/tutorials.rst index 6b40e4b658..cee04fe523 100644 --- a/docs/tutorials/tutorials.rst +++ b/docs/tutorials/tutorials.rst @@ -9,3 +9,4 @@ Tutorials gpcam_tutorial aposmm_tutorial calib_cancel_tutorial + xopt_bayesian_gen diff --git a/docs/tutorials/xopt_bayesian_gen.rst b/docs/tutorials/xopt_bayesian_gen.rst new file mode 100644 index 0000000000..a4b99a3b02 --- /dev/null +++ b/docs/tutorials/xopt_bayesian_gen.rst @@ -0,0 +1,171 @@ +Bayesian Optimization with Xopt +=============================== + +**Requires**: libensemble, xopt, gest-api + +This tutorial demonstrates using Xopt's Bayesian **ExpectedImprovementGenerator** with libEnsemble. + +We'll show two approaches: + +1. Using an xopt-style simulator (callable function) +2. Using a libEnsemble-style simulator function + +|Open in Colab| + +Imports +------- + +.. code-block:: python + + import numpy as np + from gest_api.vocs import VOCS + from xopt.generators.bayesian.expected_improvement import ExpectedImprovementGenerator + + from libensemble import Ensemble + from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f + from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + +Simulator Function +------------------ + +First, we define the xopt-style simulator function. + +This is a basic function just to show how it works. + +.. code-block:: python + + def test_callable(input_dict: dict) -> dict: + """Single-objective callable test function""" + assert isinstance(input_dict, dict) + x1 = input_dict["x1"] + x2 = input_dict["x2"] + y1 = x2 + c1 = x1 + return {"y1": y1, "c1": c1} + +Setup +----- + +Define the VOCS specification and set up the generator. + +.. code-block:: python + + libE_specs = LibeSpecs(gen_on_manager=True, nworkers=4) + + vocs = VOCS( + variables={"x1": [0, 1.0], "x2": [0, 10.0]}, + objectives={"y1": "MINIMIZE"}, + constraints={"c1": ["GREATER_THAN", 0.5]}, + constants={"constant1": 1.0}, + ) + + gen = ExpectedImprovementGenerator(vocs=vocs) + + # Create 4 initial points and ingest them + initial_points = [ + {"x1": 0.2, "x2": 2.0, "y1": 2.0, "c1": 0.2}, + {"x1": 0.5, "x2": 5.0, "y1": 5.0, "c1": 0.5}, + {"x1": 0.7, "x2": 7.0, "y1": 7.0, "c1": 0.7}, + {"x1": 0.9, "x2": 9.0, "y1": 9.0, "c1": 0.9}, + ] + gen.ingest(initial_points) + +Define libEnsemble specifications. Note the gen_specs and sim_specs are set using vocs. + +Approach 1: Using Xopt-style Simulator (Callable Function) +----------------------------------------------------------- + +The simulator is a simple callable function that takes a dictionary of inputs and returns a dictionary of outputs. + +.. code-block:: python + + gen_specs = GenSpecs( + generator=gen, + vocs=vocs, + ) + + # Note: using 'simulator' parameter for xopt-style callable + sim_specs = SimSpecs( + simulator=test_callable, + vocs=vocs, + ) + + alloc_specs = AllocSpecs(alloc_f=alloc_f) + exit_criteria = ExitCriteria(sim_max=12) + + workflow = Ensemble( + libE_specs=libE_specs, + sim_specs=sim_specs, + alloc_specs=alloc_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + ) + + H, _, _ = workflow.run() + + if workflow.is_manager: + print(f"Completed {len(H)} simulations") + print(H[["x1", "x2", "y1", "c1"]]) + assert np.array_equal(H["y1"], H["x2"]) + assert np.array_equal(H["c1"], H["x1"]) + +Approach 2: Using libEnsemble-style Simulator Function +------------------------------------------------------- + +Now we define the libEnsemble-style simulator function and use it in the workflow. + +.. code-block:: python + + def test_sim(H, persis_info, sim_specs, _): + """ + Simple sim function that takes x1, x2, constant1 from H and returns y1, c1. + Logic: y1 = x2, c1 = x1 + """ + batch = len(H) + H_o = np.zeros(batch, dtype=sim_specs["out"]) + + for i in range(batch): + x1 = H["x1"][i] + x2 = H["x2"][i] + H_o["y1"][i] = x2 + H_o["c1"][i] = x1 + + return H_o, persis_info + +Reset generator and change to libEnsemble-style simulator: + +.. code-block:: python + + # Reset generator and change to libEnsemble-style simulator + gen = ExpectedImprovementGenerator(vocs=vocs) + gen.ingest(initial_points) + + gen_specs = GenSpecs( + generator=gen, + vocs=vocs, + ) + + # Note: using 'sim_f' parameter for libEnsemble-style function + sim_specs = SimSpecs( + sim_f=test_sim, + vocs=vocs, + ) + + workflow = Ensemble( + libE_specs=libE_specs, + sim_specs=sim_specs, + alloc_specs=alloc_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + ) + + H, _, _ = workflow.run() + + if workflow.is_manager: + print(f"Completed {len(H)} simulations") + print(H[["x1", "x2", "y1", "c1"]]) + assert np.array_equal(H["y1"], H["x2"]) + assert np.array_equal(H["c1"], H["x1"]) + +.. |Open in Colab| image:: https://colab.research.google.com/assets/colab-badge.svg + :target: http://colab.research.google.com/github/Libensemble/libensemble/blob/examples/xopt_generators/examples/tutorials/xopt_bayesian_gen/xopt_EI_example.ipynb diff --git a/examples/tutorials/xopt_bayesian_gen/xopt_EI_example.ipynb b/examples/tutorials/xopt_bayesian_gen/xopt_EI_example.ipynb new file mode 100644 index 0000000000..cb5730c8a2 --- /dev/null +++ b/examples/tutorials/xopt_bayesian_gen/xopt_EI_example.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Xopt Expected Improvement Generator Example\n", + "\n", + "**Requires**: libensemble, xopt, gest-api\n", + "\n", + "This notebook demonstrates using Xopt's Bayeisan **ExpectedImprovementGenerator** with libEnsemble.\n", + "We'll show two approaches:\n", + "1. Using an xopt-style simulator (callable function)\n", + "2. Using a libEnsemble-style simulator function\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from gest_api.vocs import VOCS\n", + "from xopt.generators.bayesian.expected_improvement import ExpectedImprovementGenerator\n", + "\n", + "from libensemble import Ensemble\n", + "from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f\n", + "from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Simulator Function\n", + "\n", + "First, we define the xopt-style simulator function.\n", + "\n", + "This is a basic function just to show how it works.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test_callable(input_dict: dict) -> dict:\n", + " \"\"\"Single-objective callable test function\"\"\"\n", + " assert isinstance(input_dict, dict)\n", + " x1 = input_dict[\"x1\"]\n", + " x2 = input_dict[\"x2\"]\n", + " y1 = x2\n", + " c1 = x1\n", + " return {\"y1\": y1, \"c1\": c1}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "Define the VOCS specification and set up the generator.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "libE_specs = LibeSpecs(gen_on_manager=True, nworkers=4)\n", + "\n", + "vocs = VOCS(\n", + " variables={\"x1\": [0, 1.0], \"x2\": [0, 10.0]},\n", + " objectives={\"y1\": \"MINIMIZE\"},\n", + " constraints={\"c1\": [\"GREATER_THAN\", 0.5]},\n", + " constants={\"constant1\": 1.0},\n", + ")\n", + "\n", + "gen = ExpectedImprovementGenerator(vocs=vocs)\n", + "\n", + "# Create 4 initial points and ingest them\n", + "initial_points = [\n", + " {\"x1\": 0.2, \"x2\": 2.0, \"y1\": 2.0, \"c1\": 0.2},\n", + " {\"x1\": 0.5, \"x2\": 5.0, \"y1\": 5.0, \"c1\": 0.5},\n", + " {\"x1\": 0.7, \"x2\": 7.0, \"y1\": 7.0, \"c1\": 0.7},\n", + " {\"x1\": 0.9, \"x2\": 9.0, \"y1\": 9.0, \"c1\": 0.9},\n", + "]\n", + "gen.ingest(initial_points)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define libEnsemble specifications. Note the gen_specs and sim_specs are set using vocs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "gen_specs = GenSpecs(\n", + " generator=gen,\n", + " vocs=vocs,\n", + ")\n", + "\n", + "# Note: using 'simulator' parameter for xopt-style callable\n", + "sim_specs = SimSpecs(\n", + " simulator=test_callable,\n", + " vocs=vocs,\n", + ")\n", + "\n", + "alloc_specs = AllocSpecs(alloc_f=alloc_f)\n", + "exit_criteria = ExitCriteria(sim_max=12)\n", + "\n", + "workflow = Ensemble(\n", + " libE_specs=libE_specs,\n", + " sim_specs=sim_specs,\n", + " alloc_specs=alloc_specs,\n", + " gen_specs=gen_specs,\n", + " exit_criteria=exit_criteria,\n", + ")\n", + "\n", + "H, _, _ = workflow.run()\n", + "\n", + "if workflow.is_manager:\n", + " print(f\"Completed {len(H)} simulations\")\n", + " print(H[[\"x1\", \"x2\", \"y1\", \"c1\"]])\n", + " assert np.array_equal(H[\"y1\"], H[\"x2\"])\n", + " assert np.array_equal(H[\"c1\"], H[\"x1\"])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Approach 2: Using libEnsemble-style Simulator Function\n", + "\n", + "Now we define the libEnsemble-style simulator function and use it in the workflow.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def test_sim(H, persis_info, sim_specs, _):\n", + " \"\"\"\n", + " Simple sim function that takes x1, x2, constant1 from H and returns y1, c1.\n", + " Logic: y1 = x2, c1 = x1\n", + " \"\"\"\n", + " batch = len(H)\n", + " H_o = np.zeros(batch, dtype=sim_specs[\"out\"])\n", + "\n", + " for i in range(batch):\n", + " x1 = H[\"x1\"][i]\n", + " x2 = H[\"x2\"][i]\n", + " H_o[\"y1\"][i] = x2\n", + " H_o[\"c1\"][i] = x1\n", + "\n", + " return H_o, persis_info" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Reset generator and change to libEnsemble-style simulator\n", + "gen = ExpectedImprovementGenerator(vocs=vocs)\n", + "gen.ingest(initial_points)\n", + "\n", + "gen_specs = GenSpecs(\n", + " generator=gen,\n", + " vocs=vocs,\n", + ")\n", + "\n", + "# Note: using 'sim_f' parameter for libEnsemble-style function\n", + "sim_specs = SimSpecs(\n", + " sim_f=test_sim,\n", + " vocs=vocs,\n", + ")\n", + "\n", + "workflow = Ensemble(\n", + " libE_specs=libE_specs,\n", + " sim_specs=sim_specs,\n", + " alloc_specs=alloc_specs,\n", + " gen_specs=gen_specs,\n", + " exit_criteria=exit_criteria,\n", + ")\n", + "\n", + "H, _, _ = workflow.run()\n", + "\n", + "if workflow.is_manager:\n", + " print(f\"Completed {len(H)} simulations\")\n", + " print(H[[\"x1\", \"x2\", \"y1\", \"c1\"]])\n", + " assert np.array_equal(H[\"y1\"], H[\"x2\"])\n", + " assert np.array_equal(H[\"c1\"], H[\"x1\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libensemble/__init__.py b/libensemble/__init__.py index 6053368219..8df3af2073 100644 --- a/libensemble/__init__.py +++ b/libensemble/__init__.py @@ -12,3 +12,4 @@ from libensemble import logger from .ensemble import Ensemble +from .generators import Generator diff --git a/libensemble/comms/comms.py b/libensemble/comms/comms.py index 51042c4637..52f71dad97 100644 --- a/libensemble/comms/comms.py +++ b/libensemble/comms/comms.py @@ -264,8 +264,8 @@ def __init__(self, main, nworkers, *args, **kwargs): self.inbox = Queue() self.outbox = Queue() super().__init__(self, main, *args, **kwargs) - comm = QComm(self.inbox, self.outbox, nworkers) - self.handle = Process(target=_qcomm_main, args=(comm, main) + args, kwargs=kwargs) + self.comm = QComm(self.inbox, self.outbox, nworkers) + self.handle = Process(target=_qcomm_main, args=(self.comm, main) + args, kwargs=kwargs) def terminate(self, timeout=None): """Terminate the process.""" diff --git a/libensemble/gen_classes/__init__.py b/libensemble/gen_classes/__init__.py new file mode 100644 index 0000000000..d0524159da --- /dev/null +++ b/libensemble/gen_classes/__init__.py @@ -0,0 +1,2 @@ +from .aposmm import APOSMM # noqa: F401 +from .sampling import UniformSample # noqa: F401 diff --git a/libensemble/gen_classes/aposmm.py b/libensemble/gen_classes/aposmm.py new file mode 100644 index 0000000000..b081a1d07f --- /dev/null +++ b/libensemble/gen_classes/aposmm.py @@ -0,0 +1,381 @@ +import copy +import warnings +from math import gamma, pi, sqrt +from typing import List + +import numpy as np +from gest_api.vocs import VOCS +from numpy import typing as npt + +from libensemble.generators import PersistentGenInterfacer +from libensemble.message_numbers import EVAL_GEN_TAG, PERSIS_STOP + + +class APOSMM(PersistentGenInterfacer): + """ + APOSMM coordinates multiple local optimization runs, dramatically reducing time for + discovering multiple minima on parallel systems. + + This *generator* adheres to the `Generator Standard `_. + + .. seealso:: + + `https://doi.org/10.1007/s12532-017-0131-4 `_ + + VOCS variables must include both regular and *_on_cube versions. E.g.,: + + ```python + vars_std = { + "var1": [-10.0, 10.0], + "var2": [0.0, 100.0], + "var3": [1.0, 50.0], + "var1_on_cube": [0, 1.0], + "var2_on_cube": [0, 1.0], + "var3_on_cube": [0, 1.0], + } + variables_mapping = { + "x": ["var1", "var2", "var3"], + "x_on_cube": ["var1_on_cube", "var2_on_cube", "var3_on_cube"], + } + gen = APOSMM(vocs, 3, 3, variables_mapping=variables_mapping, ...) + ``` + + Getting started + --------------- + + APOSMM requires a minimal sample before starting optimization. A random sample across the domain + can either be retrieved via a `suggest()` call right after initialization, or the user can ingest + a set of sample points via `ingest()`. The minimal sample size is specified via the `initial_sample_size` + parameter. This many evaluated sample points *must* be provided to APOSMM before it will provide any + local optimization points. + + ```python + # Approach 1: Retrieve sample points via suggest() + gen = APOSMM(vocs, max_active_runs=2, initial_sample_size=10) + + # ask APOSMM for some sample points + initial_sample = gen.suggest(10) + for point in initial_sample: + point["f"] = func(point["x"]) + gen.ingest(initial_sample) + + # APOSMM will now provide local-optimization points. + points = gen.suggest(10) + + # ---------------- + + # Approach 2: Ingest pre-computed sample points via ingest() + gen = APOSMM(vocs, max_active_runs=2, initial_sample_size=10) + + initial_sample = create_initial_sample() + for point in initial_sample: + point["f"] = func(point["x"]) + + # provide APOSMM with sample points + gen.ingest(initial_sample) + + # APOSMM will now provide local-optimization points. + points = gen.suggest(10) + + ... + ``` + + *Important Note*: After the initial sample phase, APOSMM cannot accept additional "arbitrary" + sample points that are not associated with local optimization runs. + + ```python + gen = APOSMM(vocs, max_active_runs=2, initial_sample_size=10) + + # ask APOSMM for some sample points + initial_sample = gen.suggest(10) + for point in initial_sample: + point["f"] = func(point["x"]) + gen.ingest(initial_sample) + + # APOSMM will now provide local-optimization points. + points_from_aposmm = gen.suggest(10) + for point in points_from_aposmm: + point["f"] = func(point["x"]) + gen.ingest(points_from_aposmm) + + gen.ingest(another_sample) # THIS CRASHES + ``` + + Parameters + ---------- + vocs: VOCS + The VOCS object, adhering to the VOCS interface from the Generator Standard. + + max_active_runs: int + Bound on number of runs APOSMM is *concurrently* advancing. + + initial_sample_size: int + + Minimal sample points required before starting optimization. + + If `suggest(N)` is called first, APOSMM produces this many random sample points across the domain, + with N <= initial_sample_size. + + If `ingest(sample)` is called first, multiple calls like `ingest(sample)` are required until + the total number of points ingested is >= initial_sample_size. + + ```python + gen = APOSMM(vocs, max_active_runs=2, initial_sample_size=10) + + # ask APOSMM for some sample points + initial_sample = gen.suggest(10) + for point in initial_sample: + point["f"] = func(point["x"]) + gen.ingest(initial_sample) + + # APOSMM will now provide local-optimization points. + points = gen.suggest(10) + ... + ``` + + History: npt.NDArray = [] + An optional history of previously evaluated points. + + sample_points: npt.NDArray = None + Included for compatibility with the underlying algorithm. + Points to be sampled (original domain). + If more sample points are needed by APOSMM during the course of the + optimization, points will be drawn uniformly over the domain. + + localopt_method: str = "scipy_Nelder-Mead" (scipy) or "LN_BOBYQA" (nlopt) + The local optimization method to use. Others being added over time. + + mu: float = 1e-8 + Distance from the boundary that all localopt starting points must satisfy + + nu: float = 1e-8 + Distance from identified minima that all starting points must satisfy + + rk_const: float = None + Multiplier in front of the ``r_k`` value. + If not provided, it will be set to ``0.5 * ((gamma(1 + (n / 2)) * 5) ** (1 / n)) / sqrt(pi)`` + + xtol_abs: float = 1e-6 + Localopt method's convergence tolerance. + + ftol_abs: float = 1e-6 + Localopt method's convergence tolerance. + + opt_return_codes: list[int] = [0] + scipy only: List of return codes that determine if a point should be ruled a local minimum. + + dist_to_bound_multiple: float = 0.5 + What fraction of the distance to the nearest boundary should the initial + step size be in localopt runs. + + random_seed: int = 1 + Seed for the random number generator. + """ + + def _validate_vocs(self, vocs: VOCS): + if len(vocs.constraints): + warnings.warn("APOSMM does not support constraints in VOCS. Ignoring.") + if len(vocs.constants): + warnings.warn("APOSMM does not support constants in VOCS. Ignoring.") + + def __init__( + self, + vocs: VOCS, + max_active_runs: int, + initial_sample_size: int, + History: npt.NDArray = [], + sample_points: npt.NDArray = None, + localopt_method: str = "scipy_Nelder-Mead", + rk_const: float = None, + xtol_abs: float = 1e-6, + ftol_abs: float = 1e-6, + opt_return_codes: list[int] = [0], + mu: float = 1e-8, + nu: float = 1e-8, + dist_to_bound_multiple: float = 0.5, + random_seed: int = 1, + **kwargs, + ) -> None: + + from libensemble.gen_funcs.persistent_aposmm import aposmm + + self.VOCS = vocs + + gen_specs = {} + gen_specs["user"] = {} + persis_info = {} + libE_info = {} + gen_specs["gen_f"] = aposmm + n = len(list(vocs.variables.keys())) + + if not rk_const: + rk_const = 0.5 * ((gamma(1 + (n / 2)) * 5) ** (1 / n)) / sqrt(pi) + + FIELDS = [ + "initial_sample_size", + "sample_points", + "localopt_method", + "rk_const", + "xtol_abs", + "ftol_abs", + "mu", + "nu", + "opt_return_codes", + "dist_to_bound_multiple", + "max_active_runs", + "random_seed", + ] + + for k in FIELDS: + val = locals().get(k) + if val is not None: + gen_specs["user"][k] = val + + super().__init__(vocs, History, persis_info, gen_specs, libE_info, **kwargs) + + # Set bounds using the correct x mapping + x_mapping = self.variables_mapping["x"] + self.gen_specs["user"]["lb"] = np.array([vocs.variables[var].domain[0] for var in x_mapping]) + self.gen_specs["user"]["ub"] = np.array([vocs.variables[var].domain[1] for var in x_mapping]) + + x_size = len(self.variables_mapping.get("x", [])) + x_on_cube_size = len(self.variables_mapping.get("x_on_cube", [])) + + try: + assert x_size > 0 and x_on_cube_size > 0 + except AssertionError: + raise ValueError( + """ User must provide a variables_mapping dictionary in the following format: + + variables = {"core": [-3, 3], "edge": [-2, 2], "core_on_cube": [0, 1], "edge_on_cube": [0, 1]} + objectives = {"energy": "MINIMIZE"} + + variables_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube"], + "f": ["energy"], + } + """ + ) + try: + assert x_size == x_on_cube_size + except AssertionError: + raise ValueError( + "Within the variables_mapping dictionary, x and x_on_cube " + + f"must have same length but got {x_size} and {x_on_cube_size}" + ) + + gen_specs["out"] = [ + ("x", float, x_size), + ("x_on_cube", float, x_on_cube_size), + ("sim_id", int), + ("local_min", bool), + ("local_pt", bool), + ] + + gen_specs["persis_in"] = ["sim_id", "x", "x_on_cube", "f", "sim_ended"] + if "components" in kwargs or "components" in gen_specs.get("user", {}): + gen_specs["persis_in"].append("fvec") + + # SH - Need to know if this is gen_on_manager or not. + self.persis_info["nworkers"] = gen_specs["user"].get("max_active_runs") + self.all_local_minima = [] + self._suggest_idx = 0 + self._last_suggest = None + self._ingest_buf = None + self._n_buffd_results = 0 + self._told_initial_sample = False + self._first_called_method = None + self._last_call = None + self._last_num_points = 0 + + def _slot_in_data(self, results): + """Slot in libE_calc_in and trial data into corresponding array fields. *Initial sample only!!*""" + self._ingest_buf[self._n_buffd_results : self._n_buffd_results + len(results)] = results + + def _enough_initial_sample(self): + return ( + self._n_buffd_results >= int(self.gen_specs["user"]["initial_sample_size"]) + ) or self._told_initial_sample + + def _ready_to_suggest_genf(self): + """ + We're presumably ready to be suggested IF: + - When we're working on the initial sample: + - We have no _last_suggest cached + - all points given out have returned AND we've been suggested *at least* as many points as we cached + - When we're done with the initial sample: + - we've been suggested *at least* as many points as we cached + - we've just ingested some results + """ + if not self._told_initial_sample and self._last_suggest is not None: + cond = all([i in self._ingest_buf["sim_id"] for i in self._last_suggest["sim_id"]]) + else: + cond = True + return self._last_suggest is None or (cond and (self._suggest_idx >= len(self._last_suggest))) + + def suggest_numpy(self, num_points: int = 0) -> npt.NDArray: + """Request the next set of points to evaluate, as a NumPy array.""" + + if self._first_called_method is None: + self._first_called_method = "suggest" + self.gen_specs["user"]["generate_sample_points"] = True + + if self._ready_to_suggest_genf(): + self._suggest_idx = 0 + if self._last_call == "suggest" and num_points == 0 and self._last_num_points == 0: + self.finalize() + raise RuntimeError("Cannot suggest points since APOSMM is currently expecting to receive a sample") + self._last_suggest = super().suggest_numpy(num_points) + + if self._last_suggest["local_min"].any(): # filter out local minima rows + min_idxs = self._last_suggest["local_min"] + self.all_local_minima.append(self._last_suggest[min_idxs]) + self._last_suggest = self._last_suggest[~min_idxs] + + if num_points > 0: # we've been suggested for a selection of the last suggest + results = np.copy(self._last_suggest[self._suggest_idx : self._suggest_idx + num_points]) + self._suggest_idx += num_points + + else: + results = np.copy(self._last_suggest) + self._last_suggest = None + + self._last_call = "suggest" + self._last_num_points = num_points + return results + + def ingest_numpy(self, results: npt.NDArray, tag: int = EVAL_GEN_TAG) -> None: + + if self._first_called_method is None: + self._first_called_method = "ingest" + self.gen_specs["user"]["generate_sample_points"] = False + + if (results is None and tag == PERSIS_STOP) or self._told_initial_sample: + super().ingest_numpy(results, tag) + self._last_call = "ingest" + return + + # Initial sample buffering here: + + if self._n_buffd_results == 0: + self._ingest_buf = np.zeros(self.gen_specs["user"]["initial_sample_size"], dtype=results.dtype) + + if not self._enough_initial_sample(): + self._slot_in_data(np.copy(results)) + self._n_buffd_results += len(results) + + if self._enough_initial_sample(): + if "sim_id" in results.dtype.names and not self._told_initial_sample: + self._ingest_buf["sim_id"] = range(len(self._ingest_buf)) + super().ingest_numpy(self._ingest_buf, tag) + self._told_initial_sample = True + self._n_buffd_results = 0 + + self._last_call = "ingest" + + def suggest_updates(self) -> List[npt.NDArray]: + """Request a list of NumPy arrays containing entries that have been identified as minima.""" + minima = copy.deepcopy(self.all_local_minima) + self.all_local_minima = [] + return minima diff --git a/libensemble/gen_classes/external/sampling.py b/libensemble/gen_classes/external/sampling.py new file mode 100644 index 0000000000..3ddb72cb97 --- /dev/null +++ b/libensemble/gen_classes/external/sampling.py @@ -0,0 +1,64 @@ +from gest_api.vocs import VOCS +from gest_api import Generator +import numpy as np + +__all__ = [ + "UniformSample", + "UniformSampleArray", +] + + +class UniformSample(Generator): + """ + This sampler adheres to the gest-api VOCS interface and data structures (no numpy). + + Each variable is a scalar. + """ + + def __init__(self, VOCS: VOCS): + self.VOCS = VOCS + self.rng = np.random.default_rng(1) + super().__init__(VOCS) + + def _validate_vocs(self, VOCS): + assert len(self.VOCS.variables), "VOCS must contain variables." + + def suggest(self, n_trials): + output = [] + for _ in range(n_trials): + trial = {} + for key in self.VOCS.variables.keys(): + trial[key] = self.rng.uniform(self.VOCS.variables[key].domain[0], self.VOCS.variables[key].domain[1]) + output.append(trial) + return output + + def ingest(self, calc_in): + pass # random sample so nothing to tell + + +class UniformSampleArray(Generator): + """ + This sampler adheres to the gest-api VOCS interface and data structures. + + Uses one array variable of any dimension. Array is a numpy array. + """ + + def __init__(self, VOCS: VOCS): + self.VOCS = VOCS + self.rng = np.random.default_rng(1) + super().__init__(VOCS) + + def _validate_vocs(self, VOCS): + assert len(self.VOCS.variables) == 1, "VOCS must contain exactly one variable." + + def suggest(self, n_trials): + output = [] + key = list(self.VOCS.variables.keys())[0] + var = self.VOCS.variables[key] + for _ in range(n_trials): + trial = {key: np.array([self.rng.uniform(bounds[0], bounds[1]) for bounds in var.domain])} + output.append(trial) + return output + + def ingest(self, calc_in): + pass # random sample so nothing to tell diff --git a/libensemble/gen_classes/gpCAM.py b/libensemble/gen_classes/gpCAM.py new file mode 100644 index 0000000000..b544478831 --- /dev/null +++ b/libensemble/gen_classes/gpCAM.py @@ -0,0 +1,153 @@ +"""Generator class exposing gpCAM functionality""" + +import time +from typing import List + +import numpy as np +from gest_api.vocs import VOCS +from gpcam import GPOptimizer as GP +from numpy import typing as npt + +# While there are class / func duplicates - re-use functions. +from libensemble.gen_funcs.persistent_gpCAM import ( + _calculate_grid_distances, + _eval_var, + _find_eligible_points, + _generate_mesh, + _read_testpoints, +) +from libensemble.generators import LibensembleGenerator + +__all__ = [ + "GP_CAM", + "GP_CAM_Covar", +] + + +# Equivalent to function persistent_gpCAM_ask_tell +class GP_CAM(LibensembleGenerator): + """ + This generation function constructs a global surrogate of `f` values. + + It is a batched method that produces a first batch uniformly random from + (lb, ub). On subsequent iterations, it calls an optimization method to + produce the next batch of points. This optimization might be too slow + (relative to the simulation evaluation time) for some use cases. + """ + + def __init__(self, VOCS: VOCS, ask_max_iter: int = 10, random_seed: int = 1, *args, **kwargs): + + super().__init__(VOCS, *args, **kwargs) + self.rng = np.random.default_rng(random_seed) + + self.lb = np.array([VOCS.variables[i].domain[0] for i in VOCS.variables]) + self.ub = np.array([VOCS.variables[i].domain[1] for i in VOCS.variables]) + self.n = len(self.lb) # dimension + self.all_x = np.empty((0, self.n)) + self.all_y = np.empty((0, 1)) + assert isinstance(self.n, int), "Dimension must be an integer" + assert isinstance(self.lb, np.ndarray), "lb must be a numpy array" + assert isinstance(self.ub, np.ndarray), "ub must be a numpy array" + + self.dtype = [("x", float, (self.n))] + + self.my_gp = None + self.noise = 1e-8 # 1e-12 + self.ask_max_iter = ask_max_iter + + def _validate_vocs(self, vocs): + assert len(vocs.variables), "VOCS must contain variables." + assert len(vocs.objectives), "VOCS must contain at least one objective." + + def suggest_numpy(self, n_trials: int) -> npt.NDArray: + if self.all_x.shape[0] == 0: + self.x_new = self.rng.uniform(self.lb, self.ub, (n_trials, self.n)) + else: + start = time.time() + self.x_new = self.my_gp.ask( + input_set=np.column_stack((self.lb, self.ub)), + n=n_trials, + pop_size=n_trials, + acquisition_function="total correlation", + max_iter=self.ask_max_iter, # Larger takes longer. gpCAM default is 20. + )["x"] + print(f"Ask time:{time.time() - start}") + H_o = np.zeros(n_trials, dtype=self.dtype) + H_o["x"] = self.x_new + return H_o + + def ingest_numpy(self, calc_in: npt.NDArray) -> None: + if calc_in is not None: + if "x" in calc_in.dtype.names: # SH should we require x in? + self.x_new = np.atleast_2d(calc_in["x"]) + self.y_new = np.atleast_2d(calc_in["f"]).T + nan_indices = [i for i, fval in enumerate(self.y_new) if np.isnan(fval[0])] + self.x_new = np.delete(self.x_new, nan_indices, axis=0) + self.y_new = np.delete(self.y_new, nan_indices, axis=0) + + self.all_x = np.vstack((self.all_x, self.x_new)) + self.all_y = np.vstack((self.all_y, self.y_new)) + + noise_var = self.noise * np.ones(len(self.all_y)) + if self.my_gp is None: + self.my_gp = GP(self.all_x, self.all_y.flatten(), noise_variances=noise_var) + else: + self.my_gp.tell(self.all_x, self.all_y.flatten(), noise_variances=noise_var) + self.my_gp.train() + + +class GP_CAM_Covar(GP_CAM): + """ + This generation function constructs a global surrogate of `f` values. + + It is a batched method that produces a first batch uniformly random from + (lb, ub) and on following iterations samples the GP posterior covariance + function to find sample points. + """ + + def __init__(self, VOCS, test_points_file: str = None, use_grid: bool = False, *args, **kwargs): + super().__init__(VOCS, *args, **kwargs) + self.test_points = _read_testpoints({"test_points_file": test_points_file}) + self.x_for_var = None + self.var_vals = None + self.use_grid = use_grid + self.persis_info = {} + if self.use_grid: + self.num_points = 10 + self.x_for_var = _generate_mesh(self.lb, self.ub, self.num_points) + self.r_low_init, self.r_high_init = _calculate_grid_distances(self.lb, self.ub, self.num_points) + + def suggest_numpy(self, n_trials: int) -> List[dict]: + if self.all_x.shape[0] == 0: + x_new = self.rng.uniform(self.lb, self.ub, (n_trials, self.n)) + else: + if not self.use_grid: + x_new = self.x_for_var[np.argsort(self.var_vals)[-n_trials:]] + else: + r_high = self.r_high_init + r_low = self.r_low_init + x_new = [] + r_cand = r_high # Let's start with a large radius and stop when we have batchsize points + + sorted_indices = np.argsort(-self.var_vals) + while len(x_new) < n_trials: + x_new = _find_eligible_points(self.x_for_var, sorted_indices, r_cand, n_trials) + if len(x_new) < n_trials: + r_high = r_cand + r_cand = (r_high + r_low) / 2.0 + + self.x_new = x_new + H_o = np.zeros(n_trials, dtype=self.dtype) + H_o["x"] = self.x_new + return H_o + + def ingest_numpy(self, calc_in: npt.NDArray): + if calc_in is not None: + super().ingest_numpy(calc_in) + if not self.use_grid: + n_trials = len(self.y_new) + self.x_for_var = self.rng.uniform(self.lb, self.ub, (10 * n_trials, self.n)) + + self.var_vals = _eval_var( + self.my_gp, self.all_x, self.all_y, self.x_for_var, self.test_points, self.persis_info + ) diff --git a/libensemble/gen_classes/sampling.py b/libensemble/gen_classes/sampling.py new file mode 100644 index 0000000000..5e8102c22b --- /dev/null +++ b/libensemble/gen_classes/sampling.py @@ -0,0 +1,36 @@ +"""Generator classes providing points using sampling""" + +import numpy as np +from gest_api.vocs import VOCS + +from libensemble.generators import LibensembleGenerator + +__all__ = [ + "UniformSample", +] + + +class UniformSample(LibensembleGenerator): + """ + Samples over the domain specified in the VOCS. + """ + + def __init__(self, VOCS: VOCS, random_seed: int = 1, *args, **kwargs): + super().__init__(VOCS, *args, **kwargs) + self.rng = np.random.default_rng(random_seed) + + self.n = len(list(self.VOCS.variables.keys())) + self.np_dtype = [("x", float, (self.n))] + self.lb = np.array([VOCS.variables[i].domain[0] for i in VOCS.variables]) + self.ub = np.array([VOCS.variables[i].domain[1] for i in VOCS.variables]) + + def suggest_numpy(self, n_trials): + out = np.zeros(n_trials, dtype=self.np_dtype) + + for i in range(n_trials): + out[i]["x"] = self.rng.uniform(self.lb, self.ub, (self.n)) + + return out + + def ingest_numpy(self, calc_in): + pass # random sample so nothing to tell diff --git a/libensemble/gen_funcs/aposmm_localopt_support.py b/libensemble/gen_funcs/aposmm_localopt_support.py index 2de29c8707..21ceb5e0e8 100644 --- a/libensemble/gen_funcs/aposmm_localopt_support.py +++ b/libensemble/gen_funcs/aposmm_localopt_support.py @@ -18,6 +18,7 @@ import numpy as np import psutil +import traceback import libensemble.gen_funcs from libensemble.message_numbers import EVAL_GEN_TAG, STOP_TAG # Only used to simulate receiving from manager @@ -645,8 +646,8 @@ def run_local_tao(user_specs, comm_queue, x0, f0, child_can_read, parent_can_rea def opt_runner(run_local_opt, user_specs, comm_queue, x0, f0, child_can_read, parent_can_read): try: run_local_opt(user_specs, comm_queue, x0, f0, child_can_read, parent_can_read) - except Exception as e: - comm_queue.put(ErrorMsg(e)) + except Exception: + comm_queue.put(ErrorMsg(traceback.format_exc())) parent_can_read.set() @@ -743,7 +744,7 @@ def put_set_wait_get(x, comm_queue, parent_can_read, child_can_read, user_specs) if user_specs.get("periodic"): assert np.allclose(x % 1, values[0] % 1, rtol=1e-15, atol=1e-15), "The point I gave is not the point I got back" else: - assert np.allclose(x, values[0], rtol=1e-15, atol=1e-15), "The point I gave is not the point I got back" + assert np.allclose(x, values[0], rtol=1e-8, atol=1e-8), "The point I gave is not the point I got back" return values diff --git a/libensemble/gen_funcs/persistent_aposmm.py b/libensemble/gen_funcs/persistent_aposmm.py index 685fa4021d..3102bb9fe8 100644 --- a/libensemble/gen_funcs/persistent_aposmm.py +++ b/libensemble/gen_funcs/persistent_aposmm.py @@ -177,12 +177,24 @@ def aposmm(H, persis_info, gen_specs, libE_info): if user_specs["initial_sample_size"] != 0: # Send our initial sample. We don't need to check that n_s is large enough: # the alloc_func only returns when the initial sample has function values. - persis_info = add_k_sample_points_to_local_H( - user_specs["initial_sample_size"], user_specs, persis_info, n, comm, local_H, sim_id_to_child_inds - ) - if not user_specs.get("standalone"): + + if not user_specs.get("generate_sample_points", True): # add an extra receive for the sample points + # gonna loop here while the user suggests/ingests sample points until we reach the desired sample size + while n_s < user_specs["initial_sample_size"]: + tag, Work, presumptive_user_sample = ps.recv() + if presumptive_user_sample is not None: + n_s, n_r = update_local_H_after_receiving( + local_H, n, n_s, user_specs, Work, presumptive_user_sample, fields_to_pass, init=True + ) + something_sent = False + else: + persis_info = add_k_sample_points_to_local_H( + user_specs["initial_sample_size"], user_specs, persis_info, n, comm, local_H, sim_id_to_child_inds + ) + something_sent = True + if not user_specs.get("standalone") and user_specs.get("generate_sample_points", True): ps.send(local_H[-user_specs["initial_sample_size"] :][[i[0] for i in gen_specs["out"]]]) - something_sent = True + something_sent = True else: something_sent = False @@ -291,13 +303,17 @@ def aposmm(H, persis_info, gen_specs, libE_info): pass -def update_local_H_after_receiving(local_H, n, n_s, user_specs, Work, calc_in, fields_to_pass): +def update_local_H_after_receiving(local_H, n, n_s, user_specs, Work, calc_in, fields_to_pass, init=False): for name in ["f", "x_on_cube", "grad", "fvec"]: if name in fields_to_pass: assert name in calc_in.dtype.names, ( name + " must be returned to persistent_aposmm for localopt_method: " + user_specs["localopt_method"] ) + if init: + local_H.resize(len(calc_in), refcheck=False) + initialize_dists_and_inds(local_H, len(calc_in)) + for name in calc_in.dtype.names: local_H[name][Work["libE_info"]["H_rows"]] = calc_in[name] diff --git a/libensemble/generators.py b/libensemble/generators.py new file mode 100644 index 0000000000..588c13c906 --- /dev/null +++ b/libensemble/generators.py @@ -0,0 +1,245 @@ +from abc import abstractmethod +from typing import List, Optional + +import numpy as np +from gest_api import Generator +from gest_api.vocs import VOCS +from numpy import typing as npt + +from libensemble.comms.comms import QCommProcess # , QCommThread +from libensemble.executors import Executor +from libensemble.message_numbers import EVAL_GEN_TAG, PERSIS_STOP +from libensemble.tools.tools import add_unique_random_streams +from libensemble.utils.misc import list_dicts_to_np, np_to_list_dicts, unmap_numpy_array + + +class GeneratorNotStartedException(Exception): + """Exception raised by a threaded/multiprocessed generator upon being suggested without having been started""" + + +class LibensembleGenerator(Generator): + """ + Generator interface that accepts the classic History, persis_info, gen_specs, libE_info parameters after VOCS. + + ``suggest/ingest`` methods communicate lists of dictionaries, like the standard. + ``suggest_numpy/ingest_numpy`` methods communicate numpy arrays containing the same data. + + .. note:: + Most LibensembleGenerator instances operate on "x" for variables and "f" for objectives internally. + By default we map "x" to the VOCS variables and "f" to the VOCS objectives, which works for most use cases. + If a given generator iterates internally over multiple, multi-dimensional variables or objectives, + then providing a custom ``variables_mapping`` is recommended. + + For instance: + ``variables_mapping = {"x": ["core", "edge"], + "y": ["mirror-x", "mirror-y"], + "f": ["energy"], + "grad": ["grad_x", "grad_y"]}``. + """ + + def __init__( + self, + VOCS: VOCS, + History: npt.NDArray = [], + persis_info: dict = {}, + gen_specs: dict = {}, + libE_info: dict = {}, + variables_mapping: dict = {}, + **kwargs, + ): + self._validate_vocs(VOCS) + self.VOCS = VOCS + self.History = History + self.gen_specs = gen_specs + self.libE_info = libE_info + + self.variables_mapping = variables_mapping + if not self.variables_mapping: + self.variables_mapping = {} + # Map variables to x if not already mapped + if "x" not in self.variables_mapping: + # SH TODO - is this check needed? + if len(list(self.VOCS.variables.keys())) > 1 or list(self.VOCS.variables.keys())[0] != "x": + self.variables_mapping["x"] = self._get_unmapped_keys(self.VOCS.variables, "x") + # Map objectives to f if not already mapped + if "f" not in self.variables_mapping: + if ( + len(list(self.VOCS.objectives.keys())) > 1 or list(self.VOCS.objectives.keys())[0] != "f" + ): # e.g. {"f": ["f"]} doesn't need mapping + self.variables_mapping["f"] = self._get_unmapped_keys(self.VOCS.objectives, "f") + # Map sim_id to _id + self.variables_mapping["sim_id"] = ["_id"] + + if len(kwargs) > 0: # so user can specify gen-specific parameters as kwargs to constructor + if not self.gen_specs.get("user"): + self.gen_specs["user"] = {} + self.gen_specs["user"].update(kwargs) + if not persis_info.get("rand_stream"): + self.persis_info = add_unique_random_streams({}, 4, seed=4321)[1] + else: + self.persis_info = persis_info + + def _validate_vocs(self, vocs) -> None: + pass + + def _get_unmapped_keys(self, vocs_dict, default_key): + """Get keys from vocs_dict that aren't already mapped to other keys in variables_mapping.""" + # Get all variables that aren't already mapped to other keys + mapped_vars = [] + for mapped_list in self.variables_mapping.values(): + mapped_vars.extend(mapped_list) + unmapped_vars = [v for v in list(vocs_dict.keys()) if v not in mapped_vars] + return unmapped_vars + + @abstractmethod + def suggest_numpy(self, num_points: Optional[int] = 0) -> npt.NDArray: + """Request the next set of points to evaluate, as a NumPy array.""" + + @abstractmethod + def ingest_numpy(self, results: npt.NDArray) -> None: + """Send the results, as a NumPy array, of evaluations to the generator.""" + + @staticmethod + def _convert_np_types(dict_list): + return [ + {key: (value.item() if isinstance(value, np.generic) else value) for key, value in item.items()} + for item in dict_list + ] + + def suggest(self, num_points: Optional[int] = 0) -> List[dict]: + """Request the next set of points to evaluate.""" + return LibensembleGenerator._convert_np_types( + np_to_list_dicts(self.suggest_numpy(num_points), mapping=self.variables_mapping) + ) + + def ingest(self, results: List[dict]) -> None: + """Send the results of evaluations to the generator.""" + self.ingest_numpy(list_dicts_to_np(results, mapping=self.variables_mapping)) + + +class PersistentGenInterfacer(LibensembleGenerator): + """Implement suggest/ingest for traditionally written libEnsemble persistent generator functions. + Still requires a handful of libEnsemble-specific data-structures on initialization. + """ + + def __init__( + self, + VOCS: VOCS, + History: npt.NDArray = [], + persis_info: dict = {}, + gen_specs: dict = {}, + libE_info: dict = {}, + **kwargs, + ) -> None: + super().__init__(VOCS, History, persis_info, gen_specs, libE_info, **kwargs) + self.gen_f = gen_specs["gen_f"] + self.History = History + self.libE_info = libE_info + self._running_gen_f = None + self.gen_result = None + + def setup(self) -> None: + """Must be called once before calling suggest/ingest. Initializes the background thread.""" + if self._running_gen_f is not None: + raise RuntimeError("Generator has already been started.") + # SH this contains the thread lock - removing.... wrong comm to pass on anyway. + if hasattr(Executor.executor, "comm"): + del Executor.executor.comm + self.libE_info["executor"] = Executor.executor + + self._running_gen_f = QCommProcess( + self.gen_f, + None, + self.History, + self.persis_info, + self.gen_specs, + self.libE_info, + user_function=True, + ) + + # This can be set here since the object isnt started until the first suggest + self.libE_info["comm"] = self._running_gen_f.comm + + def _prep_fields(self, results: npt.NDArray) -> npt.NDArray: + """Filter out fields that are not in persis_in and add sim_ended to the dtype""" + filtered_dtype = [ + (name, results.dtype[name]) for name in results.dtype.names if name in self.gen_specs["persis_in"] + ] + + new_dtype = filtered_dtype + [("sim_ended", bool)] + new_results = np.zeros(len(results), dtype=new_dtype) + + for field in new_results.dtype.names: + try: + new_results[field] = results[field] + except ValueError: + continue + + new_results["sim_ended"] = True + return new_results + + def ingest(self, results: List[dict], tag: int = EVAL_GEN_TAG) -> None: + """Send the results of evaluations to the generator.""" + self.ingest_numpy(list_dicts_to_np(results, mapping=self.variables_mapping), tag) + + def suggest_numpy(self, num_points: int = 0) -> npt.NDArray: + """Request the next set of points to evaluate, as a NumPy array.""" + if self._running_gen_f is None: + self.setup() + self._running_gen_f.run() + _, suggest_full = self._running_gen_f.recv() + return suggest_full["calc_out"] + + def ingest_numpy(self, results: npt.NDArray, tag: int = EVAL_GEN_TAG) -> None: + """Send the results of evaluations to the generator, as a NumPy array.""" + if self._running_gen_f is None: + self.setup() + self._running_gen_f.run() + + if results is not None: + results = self._prep_fields(results) + Work = {"libE_info": {"H_rows": np.copy(results["sim_id"]), "persistent": True, "executor": None}} + self._running_gen_f.send(tag, Work) + self._running_gen_f.send(tag, np.copy(results)) + else: + self._running_gen_f.send(tag, None) + + def finalize(self) -> None: + """Stop the generator process and store the returned data.""" + if self._running_gen_f is None: + raise RuntimeError("Generator has not been started.") + self.ingest_numpy(None, PERSIS_STOP) # conversion happens in ingest + self.gen_result = self._running_gen_f.result() + + def export( + self, user_fields: bool = False, as_dicts: bool = False + ) -> tuple[npt.NDArray | list | None, dict | None, int | None]: + """Return the generator's results + Parameters + ---------- + user_fields : bool, optional + If True, return local_H with variables unmapped from arrays back to individual fields. + Default is False. + as_dicts : bool, optional + If True, return local_H as list of dictionaries instead of numpy array. + Default is False. + Returns + ------- + local_H : npt.NDArray | list + Generator history array (unmapped if user_fields=True, as dicts if as_dicts=True). + persis_info : dict + Persistent information. + tag : int + Status flag (e.g., FINISHED_PERSISTENT_GEN_TAG). + """ + if not self.gen_result: + return (None, None, None) + local_H, persis_info, tag = self.gen_result + if user_fields and local_H is not None and self.variables_mapping: + local_H = unmap_numpy_array(local_H, self.variables_mapping) + if as_dicts and local_H is not None: + if user_fields and self.variables_mapping: + local_H = np_to_list_dicts(local_H, self.variables_mapping) + else: + local_H = np_to_list_dicts(local_H) + return (local_H, persis_info, tag) diff --git a/libensemble/libE.py b/libensemble/libE.py index af302d13c8..601d8d009a 100644 --- a/libensemble/libE.py +++ b/libensemble/libE.py @@ -242,6 +242,10 @@ def libE( ] exit_criteria = specs_dump(ensemble.exit_criteria, by_alias=True, exclude_none=True) + # Restore the generator object (don't use serialized version) + if hasattr(ensemble.gen_specs, "generator") and ensemble.gen_specs.generator is not None: + gen_specs["generator"] = ensemble.gen_specs.generator + # Extract platform info from settings or environment platform_info = get_platform(libE_specs) @@ -280,7 +284,7 @@ def manager( logger.info(f"libE version v{__version__}") if "out" in gen_specs and ("sim_id", int) in gen_specs["out"]: - if "libensemble.gen_funcs" not in gen_specs["gen_f"].__module__: + if hasattr(gen_specs["gen_f"], "__module__") and "libensemble.gen_funcs" not in gen_specs["gen_f"].__module__: logger.manager_warning(_USER_SIM_ID_WARNING) try: @@ -458,6 +462,7 @@ def start_proc_team(nworkers, sim_specs, gen_specs, libE_specs, log_comm=True): for wcomm in wcomms: wcomm.run() + return wcomms diff --git a/libensemble/manager.py b/libensemble/manager.py index b12b96a774..97f8f82258 100644 --- a/libensemble/manager.py +++ b/libensemble/manager.py @@ -615,6 +615,7 @@ def _final_receive_and_kill(self, persis_info: dict) -> (dict, int, int): if self.live_data is not None: self.live_data.finalize(self.hist) + persis_info["num_gens_started"] = 0 return persis_info, exit_flag, self.elapsed() def _sim_max_given(self) -> bool: diff --git a/libensemble/sim_funcs/borehole_kills.py b/libensemble/sim_funcs/borehole_kills.py index 54a31256b3..47a00af90a 100644 --- a/libensemble/sim_funcs/borehole_kills.py +++ b/libensemble/sim_funcs/borehole_kills.py @@ -5,7 +5,7 @@ from libensemble.sim_funcs.surmise_test_function import borehole_true -def subproc_borehole(H, delay): +def subproc_borehole(H, delay, poll_manager): """This evaluates the Borehole function using a subprocess running compiled code. @@ -15,14 +15,14 @@ def subproc_borehole(H, delay): """ with open("input", "w") as f: - H["thetas"][0].tofile(f) - H["x"][0].tofile(f) + H["thetas"].tofile(f) + H["x"].tofile(f) exctr = Executor.executor args = "input" + " " + str(delay) task = exctr.submit(app_name="borehole", app_args=args, stdout="out.txt", stderr="err.txt") - calc_status = exctr.polling_loop(task, delay=0.01, poll_manager=True) + calc_status = exctr.polling_loop(task, delay=0.01, poll_manager=poll_manager) if calc_status in MAN_KILL_SIGNALS + [TASK_FAILED]: f = np.inf @@ -45,7 +45,7 @@ def borehole(H, persis_info, sim_specs, libE_info): if sim_id > sim_specs["user"]["init_sample_size"]: delay = 2 + np.random.normal(scale=0.5) - f, calc_status = subproc_borehole(H, delay) + f, calc_status = subproc_borehole(H, delay, sim_specs["user"].get("poll_manager", True)) if calc_status in MAN_KILL_SIGNALS and "sim_killed" in H_o.dtype.names: H_o["sim_killed"] = True # For calling script to print only. diff --git a/libensemble/sim_funcs/gest_api_wrapper.py b/libensemble/sim_funcs/gest_api_wrapper.py new file mode 100644 index 0000000000..414c5a5296 --- /dev/null +++ b/libensemble/sim_funcs/gest_api_wrapper.py @@ -0,0 +1,93 @@ +""" +Wrapper for simulation functions in the gest-api format. + +Gest-api functions take an input_dict (single point as dictionary) with +VOCS variables and constants, and return a dict with VOCS objectives, +observables, and constraints. +""" + +import numpy as np + +__all__ = ["gest_api_sim"] + + +def gest_api_sim(H, persis_info, sim_specs, libE_info): + """ + LibEnsemble sim_f wrapper for gest-api format simulation functions. + + Converts between libEnsemble's numpy structured array format and + gest-api's dictionary format for individual points. + + Parameters + ---------- + H : numpy structured array + Input points from libEnsemble containing VOCS variables and constants + persis_info : dict + Persistent information dictionary + sim_specs : dict + Simulation specifications. Must contain: + - "vocs": VOCS object defining variables, constants, objectives, etc. + - "simulator": The gest-api function + libE_info : dict + LibEnsemble information dictionary + + Returns + ------- + H_o : numpy structured array + Output array with VOCS objectives, observables, and constraints + persis_info : dict + Updated persistent information + + Notes + ----- + The gest-api simulator function should have signature: + def simulator(input_dict: dict, **kwargs) -> dict + + Where input_dict contains VOCS variables and constants, + and the return dict contains VOCS objectives, observables, and constraints. + + If the simulator function accepts ``libE_info``, it will be passed. This + allows simulators to access libEnsemble information such as the executor. + """ + + simulator = sim_specs["simulator"] + vocs = sim_specs["vocs"] + user_specs = sim_specs.get("user", {}) + + batch = len(H) + H_o = np.zeros(batch, dtype=sim_specs["out"]) + + # Helper to get fields from VOCS (handles both object and dict) + def get_vocs_fields(vocs, attr_names): + fields = [] + is_object = hasattr(vocs, attr_names[0]) + for attr in attr_names: + obj = getattr(vocs, attr, None) if is_object else vocs.get(attr) + if obj: + fields.extend(list(obj.keys())) + return fields + + # Get input fields (variables + constants) and output fields (objectives + observables + constraints) + input_fields = get_vocs_fields(vocs, ["variables", "constants"]) + output_fields = get_vocs_fields(vocs, ["objectives", "observables", "constraints"]) + + # Process each point in the batch + for i in range(batch): + # Build input_dict from H for this point + input_dict = {} + for field in input_fields: + input_dict[field] = H[field][i] + + # Try to pass libE_info, fall back if function doesn't accept it + try: + output_dict = simulator(input_dict, libE_info=libE_info, **user_specs) + except TypeError: + # Function doesn't accept libE_info, call without it + output_dict = simulator(input_dict, **user_specs) + + # Extract outputs from the returned dict + for field in output_fields: + if field in output_dict: + H_o[field][i] = output_dict[field] + + return H_o, persis_info diff --git a/libensemble/specs.py b/libensemble/specs.py index 308491303d..dac1baae4f 100644 --- a/libensemble/specs.py +++ b/libensemble/specs.py @@ -3,7 +3,7 @@ from pathlib import Path import pydantic -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator from libensemble.alloc_funcs.give_sim_work_first import give_sim_work_first @@ -19,6 +19,46 @@ """ +def _get_dtype(field, name: str): + """Get dtype from a VOCS field, handling discrete variables.""" + dtype = getattr(field, "dtype", None) + # For discrete variables, infer dtype from values if not specified + if dtype is None and hasattr(field, "values"): + values = field.values + if values: + # Validate all values are the same type (required for NumPy array) + value_types = {type(v) for v in values} + if len(value_types) > 1: + raise ValueError( + f"Discrete variable '{name}' has mixed types {value_types}. " + "All values must be the same type to be stored in NumPy array." + ) + # Infer dtype from any value (all same type, scalar) + # next(iter(values)) gets an element without creating a list + sample_val = next(iter(values)) + if isinstance(sample_val, str): + max_len = max(len(v) for v in values) + dtype = f"U{max_len}" + else: + dtype = type(sample_val) + return dtype + + +def _convert_dtype_to_output_tuple(name: str, dtype): + """Convert dtype to proper output tuple format for NumPy dtype specification.""" + if dtype is None: + dtype = float + if isinstance(dtype, tuple): + # Check if first element is a type (type, (shape,)) format + if len(dtype) > 1 and (isinstance(dtype[0], type) or isinstance(dtype[0], str)): + return (name, dtype[0], dtype[1]) + else: + # Just shape (shape,) format, default to float + return (name, float, dtype) + else: + return (name, dtype) + + class SimSpecs(BaseModel): """ Specifications for configuring a Simulation Function. @@ -30,6 +70,12 @@ class SimSpecs(BaseModel): produced by a generator function. """ + simulator: object | None = None + """ + A pre-initialized simulator object or callable in gest-api format. + When provided, sim_f defaults to gest_api_sim wrapper. + """ + inputs: list[str] | None = Field(default=[], alias="in") """ list of **field names** out of the complete history to pass @@ -70,6 +116,44 @@ class SimSpecs(BaseModel): the simulator function. """ + vocs: object | None = None + """ + A VOCS object. If provided and inputs/outputs are not explicitly set, + they will be automatically derived from VOCS. + """ + + @model_validator(mode="after") + def set_fields_from_vocs(self): + """Set inputs and outputs from VOCS if vocs is provided and fields are not set.""" + # If simulator is provided but sim_f is not, default to gest_api_sim + if self.simulator is not None and self.sim_f is None: + from libensemble.sim_funcs.gest_api_wrapper import gest_api_sim + + self.sim_f = gest_api_sim + + if self.vocs is None: + return self + + # Set inputs: variables + constants (what the sim receives) + if not self.inputs: + input_fields = [] + for attr in ["variables", "constants"]: + if obj := getattr(self.vocs, attr, None): + input_fields.extend(list(obj.keys())) + self.inputs = input_fields + + # Set outputs: objectives + observables + constraints (what the sim produces) + if not self.outputs: + out_fields = [] + for attr in ["objectives", "observables", "constraints"]: + if obj := getattr(self.vocs, attr, None): + for name, field in obj.items(): + dtype = getattr(field, "dtype", None) + out_fields.append(_convert_dtype_to_output_tuple(name, dtype)) + self.outputs = out_fields + + return self + class GenSpecs(BaseModel): """ @@ -82,6 +166,11 @@ class GenSpecs(BaseModel): simulator function, and makes decisions based on simulator function output. """ + generator: object | None = None + """ + A pre-initialized generator object. + """ + inputs: list[str] | None = Field(default=[], alias="in") """ list of **field names** out of the complete history to pass @@ -109,6 +198,24 @@ class GenSpecs(BaseModel): calling them locally. """ + initial_batch_size: int = 0 + """ + Number of initial points to request that the generator create. If zero, falls back to ``batch_size``. + If both options are zero, defaults to the number of workers. + + Note: Certain generators included with libEnsemble decide + batch sizes via ``gen_specs["user"]`` or other methods. + """ + + batch_size: int = 0 + """ + Number of points to generate in each batch. If zero, falls back to the number of + completed evaluations most recently told to the generator. + + Note: Certain generators included with libEnsemble decide + batch sizes via ``gen_specs["user"]`` or other methods. + """ + threaded: bool | None = False """ Instruct Worker process to launch user function to a thread. @@ -120,6 +227,38 @@ class GenSpecs(BaseModel): customizing the generator function """ + vocs: object | None = None + """ + A VOCS object. If provided and persis_in/outputs are not explicitly set, + they will be automatically derived from VOCS. + """ + + @model_validator(mode="after") + def set_fields_from_vocs(self): + """Set persis_in and outputs from VOCS if vocs is provided and fields are not set.""" + if self.vocs is None: + return self + + # Set persis_in: ALL VOCS fields (variables + constants + objectives + observables + constraints) + if not self.persis_in: + persis_in_fields = [] + for attr in ["variables", "constants", "objectives", "observables", "constraints"]: + if obj := getattr(self.vocs, attr, None): + persis_in_fields.extend(list(obj.keys())) + self.persis_in = persis_in_fields + + # Set outputs: variables + constants (what the generator produces) + if not self.outputs: + out_fields = [] + for attr in ["variables", "constants"]: + if obj := getattr(self.vocs, attr, None): + for name, field in obj.items(): + dtype = _get_dtype(field, name) + out_fields.append(_convert_dtype_to_output_tuple(name, dtype)) + self.outputs = out_fields + + return self + class AllocSpecs(BaseModel): """ diff --git a/libensemble/tests/functionality_tests/test_asktell_sampling.py b/libensemble/tests/functionality_tests/test_asktell_sampling.py new file mode 100644 index 0000000000..cebb858df2 --- /dev/null +++ b/libensemble/tests/functionality_tests/test_asktell_sampling.py @@ -0,0 +1,98 @@ +""" +Runs libEnsemble with Latin hypercube sampling on a simple 1D problem + +Execute via one of the following commands (e.g. 3 workers): + mpiexec -np 4 python test_sampling_asktell_gen.py + python test_sampling_asktell_gen.py --nworkers 3 --comms local + python test_sampling_asktell_gen.py --nworkers 3 --comms tcp + +The number of concurrent evaluations of the objective function will be 4-1=3. +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 2 4 + +import numpy as np +from gest_api.vocs import VOCS + +import libensemble.sim_funcs.six_hump_camel as six_hump_camel + +# Import libEnsemble items for this test +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.gen_classes.sampling import UniformSample +from libensemble.libE import libE +from libensemble.sim_funcs.executor_hworld import executor_hworld as sim_f_exec +from libensemble.tools import add_unique_random_streams, parse_args + + +def sim_f(In): + Out = np.zeros(1, dtype=[("f", float)]) + Out["f"] = np.linalg.norm(In) + return Out + + +if __name__ == "__main__": + nworkers, is_manager, libE_specs, _ = parse_args() + libE_specs["gen_on_manager"] = True + + sim_specs = { + "sim_f": sim_f, + "in": ["x"], + "out": [("f", float)], + } + + gen_specs = { + "persis_in": ["x", "f", "sim_id"], + "out": [("x", float, (2,))], + "initial_batch_size": 20, + "batch_size": 10, + "user": { + "initial_batch_size": 20, # for wrapper + "lb": np.array([-3, -2]), + "ub": np.array([3, 2]), + }, + } + + variables = {"x0": [-3, 3], "x1": [-2, 2]} + objectives = {"energy": "EXPLORE"} + + vocs = VOCS(variables=variables, objectives=objectives) + + alloc_specs = {"alloc_f": alloc_f} + exit_criteria = {"gen_max": 201} + persis_info = add_unique_random_streams({}, nworkers + 1, seed=1234) + + for test in range(3): + if test == 0: + generator = UniformSample(vocs) + + elif test == 1: + persis_info["num_gens_started"] = 0 + generator = UniformSample(vocs, variables_mapping={"x": ["x0", "x1"], "f": ["energy"]}) + + elif test == 2: + from libensemble.executors.mpi_executor import MPIExecutor + + persis_info["num_gens_started"] = 0 + generator = UniformSample(vocs, variables_mapping={"x": ["x0", "x1"], "f": ["energy"]}) + sim_app2 = six_hump_camel.__file__ + + executor = MPIExecutor() + executor.register_app(full_path=sim_app2, app_name="six_hump_camel", calc_type="sim") # Named app + + sim_specs = { + "sim_f": sim_f_exec, + "in": ["x"], + "out": [("f", float), ("cstat", int)], + "user": {"cores": 1}, + } + + gen_specs["generator"] = generator + H, persis_info, flag = libE( + sim_specs, gen_specs, exit_criteria, persis_info, alloc_specs, libE_specs=libE_specs + ) + + if is_manager: + print(H[["sim_id", "x", "f"]][:10]) + assert len(H) >= 201, f"H has length {len(H)}" diff --git a/libensemble/tests/functionality_tests/test_asktell_sampling_external_gen.py b/libensemble/tests/functionality_tests/test_asktell_sampling_external_gen.py new file mode 100644 index 0000000000..afc2da8b52 --- /dev/null +++ b/libensemble/tests/functionality_tests/test_asktell_sampling_external_gen.py @@ -0,0 +1,95 @@ +""" +Runs libEnsemble with Latin hypercube sampling on a simple 1D problem + +using external gest_api compatible generators. + +Execute via one of the following commands (e.g. 3 workers): + mpiexec -np 4 python test_asktell_sampling_external_gen.py + python test_asktell_sampling_external_gen.py --nworkers 3 --comms local + python test_asktell_sampling_external_gen.py --nworkers 3 --comms tcp + +The number of concurrent evaluations of the objective function will be 3. +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 2 4 + +import numpy as np +from gest_api.vocs import VOCS + +# from gest_api.vocs import ContinuousVariable + +# Import libEnsemble items for this test +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f + +# from libensemble.gen_classes.external.sampling import UniformSampleArray +from libensemble.gen_classes.external.sampling import UniformSample +from libensemble import Ensemble +from libensemble.specs import GenSpecs, SimSpecs, AllocSpecs, ExitCriteria, LibeSpecs + + +def sim_f_array(In): + Out = np.zeros(1, dtype=[("f", float)]) + Out["f"] = np.linalg.norm(In) + return Out + + +def sim_f_scalar(In): + Out = np.zeros(1, dtype=[("f", float)]) + Out["f"] = np.linalg.norm(In["x0"], In["x1"]) + return Out + + +if __name__ == "__main__": + + libE_specs = LibeSpecs(gen_on_manager=True) + + for test in range(1): # 2 + + objectives = {"f": "EXPLORE"} + + if test == 0: + sim_f = sim_f_scalar + variables = {"x0": [-3, 3], "x1": [-2, 2]} + vocs = VOCS(variables=variables, objectives=objectives) + generator = UniformSample(vocs) + + # Requires gest-api variables array bounds update + # elif test == 1: + # sim_f = sim_f_array + # variables = {"x": ContinuousVariable(dtype=(float, (2,)),domain=[[-3, 3], [-2, 2]])} + # vocs = VOCS(variables=variables, objectives=objectives) + # generator = UniformSampleArray(vocs) + + sim_specs = SimSpecs( + sim_f=sim_f, + vocs=vocs, + ) + + gen_specs = GenSpecs( + generator=generator, + initial_batch_size=20, + batch_size=10, + vocs=vocs, + ) + + alloc_specs = AllocSpecs(alloc_f=alloc_f) + exit_criteria = ExitCriteria(gen_max=201) + + ensemble = Ensemble( + parse_args=True, + sim_specs=sim_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + alloc_specs=alloc_specs, + libE_specs=libE_specs, + ) + + ensemble.add_random_streams() + ensemble.run() + + if ensemble.is_manager: + print(ensemble.H[["sim_id", "x0", "x1", "f"]][:10]) + # print(ensemble.H[["sim_id", "x", "f"]][:10]) # For array variables + assert len(ensemble.H) >= 201, f"H has length {len(ensemble.H)}" diff --git a/libensemble/tests/functionality_tests/test_persistent_uniform_gen_decides_stop.py b/libensemble/tests/functionality_tests/test_persistent_uniform_gen_decides_stop.py index 68c8aaaa05..d9b9465080 100644 --- a/libensemble/tests/functionality_tests/test_persistent_uniform_gen_decides_stop.py +++ b/libensemble/tests/functionality_tests/test_persistent_uniform_gen_decides_stop.py @@ -82,9 +82,7 @@ assert ( sum(counts == init_batch_size) >= ngens ), "The initial batch of each gen should be common among initial_batch_size number of points" - assert ( - len(counts) > 1 - ), "All gen_ended_times are the same; they should be different for the async case" + assert len(counts) > 1, "All gen_ended_times are the same; they should be different for the async case" gen_workers = np.unique(H["gen_worker"]) print("Generators that issued points", gen_workers) diff --git a/libensemble/tests/regression_tests/test_asktell_aposmm_nlopt.py b/libensemble/tests/regression_tests/test_asktell_aposmm_nlopt.py new file mode 100644 index 0000000000..83e3bf6253 --- /dev/null +++ b/libensemble/tests/regression_tests/test_asktell_aposmm_nlopt.py @@ -0,0 +1,114 @@ +""" +Runs libEnsemble with APOSMM with the NLopt local optimizer. + +Execute via one of the following commands (e.g. 3 workers): + mpiexec -np 4 python test_persistent_aposmm_nlopt.py + python test_persistent_aposmm_nlopt.py --nworkers 3 --comms local + python test_persistent_aposmm_nlopt.py --nworkers 3 --comms tcp + +When running with the above commands, the number of concurrent evaluations of +the objective function will be 2, as one of the three workers will be the +persistent generator. +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: local mpi tcp +# TESTSUITE_NPROCS: 3 + +import sys +from math import gamma, pi, sqrt + +import numpy as np + +import libensemble.gen_funcs +from libensemble.executors.mpi_executor import MPIExecutor +from libensemble.sim_funcs import six_hump_camel +from libensemble.sim_funcs.executor_hworld import executor_hworld as sim_f_exec + +# Import libEnsemble items for this test +from libensemble.sim_funcs.six_hump_camel import six_hump_camel as sim_f + +libensemble.gen_funcs.rc.aposmm_optimizers = "nlopt" +from time import time + +from gest_api.vocs import VOCS + +from libensemble import Ensemble +from libensemble.alloc_funcs.persistent_aposmm_alloc import persistent_aposmm_alloc as alloc_f +from libensemble.gen_classes import APOSMM +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, SimSpecs +from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + +# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). +if __name__ == "__main__": + + for run in range(2): + + workflow = Ensemble(parse_args=True) + + if workflow.is_manager: + start_time = time() + + if workflow.nworkers < 2: + sys.exit("Cannot run with a persistent worker if only one worker -- aborting...") + + n = 2 + workflow.alloc_specs = AllocSpecs(alloc_f=alloc_f) + + vocs = VOCS( + variables={"core": [-3, 3], "edge": [-2, 2], "core_on_cube": [-3, 3], "edge_on_cube": [-2, 2]}, + objectives={"energy": "MINIMIZE"}, + ) + + workflow.libE_specs.gen_on_manager = True + + aposmm = APOSMM( + vocs, + max_active_runs=workflow.nworkers, # should this match nworkers always? practically? + variables_mapping={"x": ["core", "edge"], "x_on_cube": ["core_on_cube", "edge_on_cube"], "f": ["energy"]}, + initial_sample_size=100, + sample_points=minima, + localopt_method="LN_BOBYQA", + rk_const=0.5 * ((gamma(1 + (n / 2)) * 5) ** (1 / n)) / sqrt(pi), + xtol_abs=1e-6, + ftol_abs=1e-6, + ) + + # SH TODO - dont want this stuff duplicated - pass with vocs instead + workflow.gen_specs = GenSpecs( + persis_in=["x", "x_on_cube", "sim_id", "local_min", "local_pt", "f"], + generator=aposmm, + batch_size=5, + initial_batch_size=10, + user={"initial_sample_size": 100}, + ) + + if run == 0: + workflow.sim_specs = SimSpecs(sim_f=sim_f, inputs=["x"], outputs=[("f", float)]) + workflow.exit_criteria = ExitCriteria(sim_max=2000) + elif run == 1: + workflow.persis_info["num_gens_started"] = 0 + sim_app2 = six_hump_camel.__file__ + exctr = MPIExecutor() + exctr.register_app(full_path=sim_app2, app_name="six_hump_camel", calc_type="sim") # Named app + workflow.sim_specs = SimSpecs( + sim_f=sim_f_exec, inputs=["x"], outputs=[("f", float), ("cstat", int)], user={"cores": 1} + ) + workflow.exit_criteria = ExitCriteria(sim_max=200) + + workflow.add_random_streams() + + H, _, _ = workflow.run() + + # Perform the run + + if workflow.is_manager and run == 0: + print("[Manager]:", H[np.where(H["local_min"])]["x"]) + print("[Manager]: Time taken =", time() - start_time, flush=True) + + tol = 1e-5 + for m in minima: + # The minima are known on this test problem. + # We use their values to test APOSMM has identified all minima + print(np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)), flush=True) + assert np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)) < tol diff --git a/libensemble/tests/regression_tests/test_asktell_gpCAM.py b/libensemble/tests/regression_tests/test_asktell_gpCAM.py new file mode 100644 index 0000000000..b093a0df7a --- /dev/null +++ b/libensemble/tests/regression_tests/test_asktell_gpCAM.py @@ -0,0 +1,96 @@ +""" +Tests libEnsemble with gpCAM + +Execute via one of the following commands (e.g. 3 workers): + mpiexec -np 4 python test_gpCAM_class.py + python test_gpCAM_class.py --nworkers 3 --comms local + +When running with the above commands, the number of concurrent evaluations of +the objective function will be 2, as one of the three workers will be the +persistent generator. + +See libensemble.gen_funcs.persistent_gpCAM for more details about the generator +setup. +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 4 +# TESTSUITE_EXTRA: true + +import sys +import warnings + +import numpy as np +from gest_api.vocs import VOCS + +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.gen_classes.gpCAM import GP_CAM, GP_CAM_Covar + +# Import libEnsemble items for this test +from libensemble.libE import libE +from libensemble.sim_funcs.rosenbrock import rosenbrock_eval as sim_f +from libensemble.tools import parse_args, save_libE_output + +warnings.filterwarnings("ignore", message="Default hyperparameter_bounds") + + +# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). +if __name__ == "__main__": + nworkers, is_manager, libE_specs, _ = parse_args() + + if nworkers < 2: + sys.exit("Cannot run with a persistent worker if only one worker -- aborting...") + + n = 4 + batch_size = 15 + + sim_specs = { + "sim_f": sim_f, + "in": ["x"], + "out": [ + ("f", float), + ], + } + + gen_specs = { + "persis_in": ["x", "f", "sim_id"], + "out": [("x", float, (n,))], + "batch_size": batch_size, + "user": { + "lb": np.array([-3, -2, -1, -1]), + "ub": np.array([3, 2, 1, 1]), + }, + } + + vocs = VOCS(variables={"x0": [-3, 3], "x1": [-2, 2], "x2": [-1, 1], "x3": [-1, 1]}, objectives={"f": "MINIMIZE"}) + + alloc_specs = {"alloc_f": alloc_f} + + gen = GP_CAM_Covar(vocs) + + for inst in range(3): + if inst == 0: + gen_specs["generator"] = gen + num_batches = 10 + exit_criteria = {"sim_max": num_batches * batch_size, "wallclock_max": 300} + libE_specs["save_every_k_gens"] = 150 + libE_specs["H_file_prefix"] = "gpCAM_nongrid" + if inst == 1: + gen = GP_CAM_Covar(vocs, use_grid=True, test_points_file="gpCAM_nongrid_after_gen_150.npy") + gen_specs["generator"] = gen + libE_specs["final_gen_send"] = True + del libE_specs["H_file_prefix"] + del libE_specs["save_every_k_gens"] + elif inst == 2: + gen = GP_CAM(vocs, ask_max_iter=1) + gen_specs["generator"] = gen + num_batches = 3 # Few because the ask_tell gen can be slow + exit_criteria = {"sim_max": num_batches * batch_size, "wallclock_max": 300} + + # Perform the run + H, persis_info, flag = libE(sim_specs, gen_specs, exit_criteria, {}, alloc_specs, libE_specs) + if is_manager: + assert len(np.unique(H["gen_ended_time"])) == num_batches + + save_libE_output(H, persis_info, __file__, nworkers) diff --git a/libensemble/tests/regression_tests/test_optimas_grid_sample.py b/libensemble/tests/regression_tests/test_optimas_grid_sample.py new file mode 100644 index 0000000000..57c6c8fedf --- /dev/null +++ b/libensemble/tests/regression_tests/test_optimas_grid_sample.py @@ -0,0 +1,109 @@ +""" +Tests libEnsemble with Optimas GridSamplingGenerator + +*****currently fixing nworkers to batch_size***** + +From Optimas test test_grid_sampling.py + +Execute via one of the following commands (e.g. 4 workers): + mpiexec -np 5 python test_optimas_grid_sample.py + python test_optimas_grid_sample.py -n 4 + +When running with the above commands, the number of concurrent evaluations of +the objective function will be 4 as the generator is on the manager. + +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 4 +# TESTSUITE_EXTRA: true + +import numpy as np +from gest_api.vocs import VOCS +from optimas.generators import GridSamplingGenerator + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +def eval_func(input_params: dict): + """Evaluation function for single-fidelity test""" + x0 = input_params["x0"] + x1 = input_params["x1"] + result = -(x0 + 10 * np.cos(x0)) * (x1 + 5 * np.cos(x1)) + return {"f": result} + + +# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). +if __name__ == "__main__": + + n = 2 + batch_size = 4 + + libE_specs = LibeSpecs(gen_on_manager=True, nworkers=batch_size) + + # Create varying parameters. + lower_bounds = [-3.0, 2.0] + upper_bounds = [1.0, 5.0] + n_steps = [7, 15] + + # Set number of evaluations. + n_evals = np.prod(n_steps) + + vocs = VOCS( + variables={ + "x0": [lower_bounds[0], upper_bounds[0]], + "x1": [lower_bounds[1], upper_bounds[1]], + }, + objectives={"f": "MAXIMIZE"}, + ) + + gen = GridSamplingGenerator(vocs=vocs, n_steps=n_steps) + + gen_specs = GenSpecs( + generator=gen, + batch_size=batch_size, + vocs=vocs, + ) + + sim_specs = SimSpecs( + simulator=eval_func, + vocs=vocs, + ) + + alloc_specs = AllocSpecs(alloc_f=alloc_f) + exit_criteria = ExitCriteria(sim_max=n_evals) + + workflow = Ensemble( + libE_specs=libE_specs, + sim_specs=sim_specs, + alloc_specs=alloc_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + ) + + H, _, _ = workflow.run() + + # Perform the run + if workflow.is_manager: + print(f"Completed {len(H)} simulations") + + # Get generated points. + h = H[H["sim_ended"]] + x0_gen = h["x0"] + x1_gen = h["x1"] + + # Get expected 1D steps along each variable. + x0_steps = np.linspace(lower_bounds[0], upper_bounds[0], n_steps[0]) + x1_steps = np.linspace(lower_bounds[1], upper_bounds[1], n_steps[1]) + + # Check that the scan along each variable is as expected. + np.testing.assert_array_equal(np.unique(x0_gen), x0_steps) + np.testing.assert_array_equal(np.unique(x1_gen), x1_steps) + + # Check that for every x0 step, the expected x1 steps are performed. + for x0_step in x0_steps: + x1_in_x0_step = x1_gen[x0_gen == x0_step] + np.testing.assert_array_equal(x1_in_x0_step, x1_steps) diff --git a/libensemble/tests/regression_tests/test_persistent_aposmm_nlopt.py b/libensemble/tests/regression_tests/test_persistent_aposmm_nlopt.py index b43a249b3f..9d42784439 100644 --- a/libensemble/tests/regression_tests/test_persistent_aposmm_nlopt.py +++ b/libensemble/tests/regression_tests/test_persistent_aposmm_nlopt.py @@ -80,7 +80,7 @@ alloc_specs = {"alloc_f": alloc_f} - persis_info = add_unique_random_streams({}, nworkers + 1) + persis_info = add_unique_random_streams({}, nworkers + 1, seed=4321) exit_criteria = {"sim_max": 2000} diff --git a/libensemble/tests/regression_tests/test_xopt_EI.py b/libensemble/tests/regression_tests/test_xopt_EI.py new file mode 100644 index 0000000000..bf114b38fe --- /dev/null +++ b/libensemble/tests/regression_tests/test_xopt_EI.py @@ -0,0 +1,103 @@ +""" +Tests libEnsemble with Xopt ExpectedImprovementGenerator + +*****currently fixing nworkers to batch_size***** + +Execute via one of the following commands (e.g. 4 workers): + mpiexec -np 5 python test_xopt_EI.py + python test_xopt_EI.py -n 4 + +When running with the above commands, the number of concurrent evaluations of +the objective function will be 4 as the generator is on the manager. + +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 4 +# TESTSUITE_EXTRA: true + +import numpy as np +from gest_api.vocs import VOCS +from xopt.generators.bayesian.expected_improvement import ExpectedImprovementGenerator + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +# Adapted from Xopt/xopt/resources/testing.py +def xtest_sim(H, persis_info, sim_specs, _): + """ + Simple sim function that takes x1, x2, constant1 from H and returns y1, c1. + Logic: y1 = x2, c1 = x1 + """ + batch = len(H) + H_o = np.zeros(batch, dtype=sim_specs["out"]) + + for i in range(batch): + x1 = H["x1"][i] + x2 = H["x2"][i] + # constant1 is available but not used in the calculation + + H_o["y1"][i] = x2 + H_o["c1"][i] = x1 + + return H_o, persis_info + + +# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). +if __name__ == "__main__": + + n = 2 + batch_size = 4 + + libE_specs = LibeSpecs(gen_on_manager=True, nworkers=batch_size) + + vocs = VOCS( + variables={"x1": [0, 1.0], "x2": [0, 10.0]}, + objectives={"y1": "MINIMIZE"}, + constraints={"c1": ["GREATER_THAN", 0.5]}, + constants={"constant1": 1.0}, + ) + + gen = ExpectedImprovementGenerator(vocs=vocs) + + # Create 4 initial points and ingest them + initial_points = [ + {"x1": 0.2, "x2": 2.0, "constant1": 1.0, "y1": 2.0, "c1": 0.2}, + {"x1": 0.5, "x2": 5.0, "constant1": 1.0, "y1": 5.0, "c1": 0.5}, + {"x1": 0.7, "x2": 7.0, "constant1": 1.0, "y1": 7.0, "c1": 0.7}, + {"x1": 0.9, "x2": 9.0, "constant1": 1.0, "y1": 9.0, "c1": 0.9}, + ] + gen.ingest(initial_points) + + gen_specs = GenSpecs( + generator=gen, + batch_size=batch_size, + vocs=vocs, + ) + + sim_specs = SimSpecs( + sim_f=xtest_sim, + vocs=vocs, + ) + + alloc_specs = AllocSpecs(alloc_f=alloc_f) + exit_criteria = ExitCriteria(sim_max=20) + + workflow = Ensemble( + libE_specs=libE_specs, + sim_specs=sim_specs, + alloc_specs=alloc_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + ) + + H, _, _ = workflow.run() + + # Perform the run + if workflow.is_manager: + print(f"Completed {len(H)} simulations") + assert np.array_equal(H["y1"], H["x2"]) + assert np.array_equal(H["c1"], H["x1"]) diff --git a/libensemble/tests/regression_tests/test_xopt_EI_xopt_sim.py b/libensemble/tests/regression_tests/test_xopt_EI_xopt_sim.py new file mode 100644 index 0000000000..f2ff1453cb --- /dev/null +++ b/libensemble/tests/regression_tests/test_xopt_EI_xopt_sim.py @@ -0,0 +1,97 @@ +""" +Tests libEnsemble with Xopt ExpectedImprovementGenerator and a gest-api form simulator. + +*****currently fixing nworkers to batch_size***** + +Execute via one of the following commands (e.g. 4 workers): + mpiexec -np 5 python test_xopt_EI_xopt_sim.py + python test_xopt_EI_xopt_sim.py -n 4 + +When running with the above commands, the number of concurrent evaluations of +the objective function will be 4 as the generator is on the manager. + +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 4 +# TESTSUITE_EXTRA: true + +import numpy as np +from gest_api.vocs import VOCS +from xopt.generators.bayesian.expected_improvement import ExpectedImprovementGenerator + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +# From Xopt/xopt/resources/testing.py +def xtest_callable(input_dict: dict, a=0) -> dict: + """Single-objective callable test function""" + assert isinstance(input_dict, dict) + x1 = input_dict["x1"] + x2 = input_dict["x2"] + + assert "constant1" in input_dict + + y1 = x2 + c1 = x1 + return {"y1": y1, "c1": c1} + + +# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). +if __name__ == "__main__": + + n = 2 + batch_size = 4 + + libE_specs = LibeSpecs(gen_on_manager=True, nworkers=batch_size) + + vocs = VOCS( + variables={"x1": [0, 1.0], "x2": [0, 10.0]}, + objectives={"y1": "MINIMIZE"}, + constraints={"c1": ["GREATER_THAN", 0.5]}, + constants={"constant1": 1.0}, + ) + + gen = ExpectedImprovementGenerator(vocs=vocs) + + # Create 4 initial points and ingest them + initial_points = [ + {"x1": 0.2, "x2": 2.0, "constant1": 1.0, "y1": 2.0, "c1": 0.2}, + {"x1": 0.5, "x2": 5.0, "constant1": 1.0, "y1": 5.0, "c1": 0.5}, + {"x1": 0.7, "x2": 7.0, "constant1": 1.0, "y1": 7.0, "c1": 0.7}, + {"x1": 0.9, "x2": 9.0, "constant1": 1.0, "y1": 9.0, "c1": 0.9}, + ] + gen.ingest(initial_points) + + gen_specs = GenSpecs( + generator=gen, + batch_size=batch_size, + vocs=vocs, + ) + + sim_specs = SimSpecs( + simulator=xtest_callable, + vocs=vocs, + ) + + alloc_specs = AllocSpecs(alloc_f=alloc_f) + exit_criteria = ExitCriteria(sim_max=20) + + workflow = Ensemble( + libE_specs=libE_specs, + sim_specs=sim_specs, + alloc_specs=alloc_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + ) + + H, _, _ = workflow.run() + + # Perform the run + if workflow.is_manager: + print(f"Completed {len(H)} simulations") + assert np.array_equal(H["y1"], H["x2"]) + assert np.array_equal(H["c1"], H["x1"]) diff --git a/libensemble/tests/regression_tests/test_xopt_nelder_mead.py b/libensemble/tests/regression_tests/test_xopt_nelder_mead.py new file mode 100644 index 0000000000..56e0daadad --- /dev/null +++ b/libensemble/tests/regression_tests/test_xopt_nelder_mead.py @@ -0,0 +1,86 @@ +""" +Tests libEnsemble with Xopt NelderMeadGenerator using Rosenbrock function + +Execute via one of the following commands (e.g. 4 workers): + mpiexec -np 5 python test_xopt_nelder_mead.py + python test_xopt_nelder_mead.py -n 4 + +When running with the above commands, the number of concurrent evaluations of +the objective function will be 4 as the generator is on the manager. + +""" + +# Do not change these lines - they are parsed by run-tests.sh +# TESTSUITE_COMMS: mpi local +# TESTSUITE_NPROCS: 2 +# TESTSUITE_EXTRA: true + +import numpy as np +from gest_api.vocs import VOCS +from xopt.generators.sequential.neldermead import NelderMeadGenerator + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + + +def rosenbrock_callable(input_dict: dict) -> dict: + """2D Rosenbrock function for gest-api style simulator""" + x1 = input_dict["x1"] + x2 = input_dict["x2"] + y1 = 100 * (x2 - x1**2) ** 2 + (1 - x1) ** 2 + return {"y1": y1} + + +# Main block is necessary only when using local comms with spawn start method (default on macOS and Windows). +if __name__ == "__main__": + + batch_size = 1 + + libE_specs = LibeSpecs(gen_on_manager=True, nworkers=batch_size) + + vocs = VOCS( + variables={"x1": [-2.0, 2.0], "x2": [-2.0, 2.0]}, + objectives={"y1": "MINIMIZE"}, + ) + + gen = NelderMeadGenerator(vocs=vocs) + + # Create initial points with evaluated rosenbrock values + initial_points = [ + {"x1": -1.2, "x2": 1.0, "y1": rosenbrock_callable({"x1": -1.2, "x2": 1.0})["y1"]}, + {"x1": -1.0, "x2": 1.0, "y1": rosenbrock_callable({"x1": -1.0, "x2": 1.0})["y1"]}, + {"x1": -0.8, "x2": 0.8, "y1": rosenbrock_callable({"x1": -0.8, "x2": 0.8})["y1"]}, + ] + gen.ingest(initial_points) + + gen_specs = GenSpecs( + generator=gen, + batch_size=batch_size, + vocs=vocs, + ) + + sim_specs = SimSpecs( + simulator=rosenbrock_callable, + vocs=vocs, + ) + + alloc_specs = AllocSpecs(alloc_f=alloc_f) + exit_criteria = ExitCriteria(sim_max=30) + + workflow = Ensemble( + libE_specs=libE_specs, + sim_specs=sim_specs, + alloc_specs=alloc_specs, + gen_specs=gen_specs, + exit_criteria=exit_criteria, + ) + + H, _, _ = workflow.run() + + # Perform the run + if workflow.is_manager: + print(f"Completed {len(H)} simulations") + initial_value = H["y1"][0] + best_value = H["y1"][np.argmin(H["y1"])] + assert best_value <= initial_value diff --git a/libensemble/tests/run_tests.py b/libensemble/tests/run_tests.py index 073ab1a541..a26ab7c5d9 100755 --- a/libensemble/tests/run_tests.py +++ b/libensemble/tests/run_tests.py @@ -26,7 +26,7 @@ COV_REPORT = True # Regression test options -REG_TEST_LIST = "test_*.py" +REG_TEST_LIST = "test_xopt_*.py test_optimas_*.py" REG_TEST_OUTPUT_EXT = "std.out" REG_STOP_ON_FAILURE = False REG_LIST_TESTS_ONLY = False # just shows all tests to be run. @@ -367,7 +367,8 @@ def run_regression_tests(root_dir, python_exec, args, current_os): reg_test_files = [] for dir_path in test_dirs: full_path = os.path.join(root_dir, dir_path) - reg_test_files.extend(glob.glob(os.path.join(full_path, reg_test_list))) + for pattern in reg_test_list.split(): + reg_test_files.extend(glob.glob(os.path.join(full_path, pattern))) reg_test_files = sorted(reg_test_files) reg_pass = 0 diff --git a/libensemble/tests/scaling_tests/forces/forces_simple_xopt/cleanup.sh b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/cleanup.sh new file mode 100755 index 0000000000..eaaa23635a --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/cleanup.sh @@ -0,0 +1 @@ +rm -r ensemble *.npy *.pickle ensemble.log lib*.txt diff --git a/libensemble/tests/scaling_tests/forces/forces_simple_xopt/forces_simf.py b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/forces_simf.py new file mode 100644 index 0000000000..3f8c2a3684 --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/forces_simf.py @@ -0,0 +1,106 @@ +""" +Module containing alternative functions for running the forces MPI application + +run_forces: Uses classic libEnsemble sim_f. +run_forces_dict: Uses gest-api/xopt style simulator. +""" + +import numpy as np + +# Optional status codes to display in libE_stats.txt for each gen or sim +from libensemble.message_numbers import TASK_FAILED, WORKER_DONE + +__all__ = [ + "run_forces", + "run_forces_dict", +] + + +def run_forces(H, persis_info, sim_specs, libE_info): + """Runs the forces MPI application. + + By default assigns the number of MPI ranks to the number + of cores available to this worker. + + To assign a different number give e.g., `num_procs=4` to + ``exctr.submit``. + """ + + calc_status = 0 + + # Parse out num particles, from generator function + particles = str(int(H["x"][0])) # x is a scalar for each point + + # app arguments: num particles, timesteps, also using num particles as seed + args = particles + " " + str(10) + " " + particles + + # Retrieve our MPI Executor + exctr = libE_info["executor"] + + # Submit our forces app for execution. + task = exctr.submit(app_name="forces", app_args=args) + + # Block until the task finishes + task.wait() + + # Try loading final energy reading, set the sim's status + statfile = "forces.stat" + try: + data = np.loadtxt(statfile) + final_energy = data[-1] + calc_status = WORKER_DONE + except Exception: + final_energy = np.nan + calc_status = TASK_FAILED + + # Define our output array, populate with energy reading + output = np.zeros(1, dtype=sim_specs["out"]) + output["energy"] = final_energy + + # Return final information to worker, for reporting to manager + return output, persis_info, calc_status + + +def run_forces_dict(input_dict: dict, libE_info: dict) -> dict: + """Runs the forces MPI application (gest-api/xopt style simulator). + + Parameters + ---------- + input_dict : dict + Input dictionary containing VOCS variables. Must contain "x" key + with the number of particles. + libE_info : dict, optional + LibEnsemble information dictionary containing executor and other info. + + Returns + ------- + dict + Output dictionary containing "energy" key with the final energy value. + """ + assert "executor" in libE_info, "executor must be available in libE_info" + + # Extract executor from libE_info + executor = libE_info["executor"] + + # Parse out num particles from input dictionary + x = input_dict["x"] + particles = str(int(x)) + + # app arguments: num particles, timesteps, also using num particles as seed + args = particles + " " + str(10) + " " + particles + + # Submit our forces app for execution. + task = executor.submit(app_name="forces", app_args=args) + + # Block until the task finishes + task.wait() + + # Try loading final energy reading + statfile = "forces.stat" + try: + data = np.loadtxt(statfile) + final_energy = float(data[-1]) + except Exception: + final_energy = np.nan + + return {"energy": final_energy} diff --git a/libensemble/tests/scaling_tests/forces/forces_simple_xopt/readme.md b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/readme.md new file mode 100644 index 0000000000..929cf3396b --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/readme.md @@ -0,0 +1,60 @@ +## Tutorial + +This example is a variation of that in the tutorial **Executor with Electrostatic Forces**. + +https://libensemble.readthedocs.io/en/develop/tutorials/executor_forces_tutorial.html + +This version uses an Xopt random number generator. + +Simulation input `x` is a scalar. + +## QuickStart + +Build forces application and run the ensemble. Go to `forces_app` directory and build `forces.x`: + + cd ../forces_app + ./build_forces.sh + +Then return here and run: + + python run_libe_forces.py -n 4 + +This will run with four workers. One worker will run the persistent generator. +The other four will run the forces simulations. + +## Detailed instructions + +Naive Electrostatics Code Test + +This is a synthetic, highly configurable simulation function. Its primary use +is to test libEnsemble's capability to launch application instances via the `MPIExecutor`. + +### Forces Mini-App + +A system of charged particles is initialized and simulated over a number of time-steps. + +See `forces_app` directory for details. + +### Running with libEnsemble. + +A random sample of seeds is taken and used as input to the simulation function +(forces miniapp). + +In the `forces_app` directory, modify `build_forces.sh` for the target platform +and run to build `forces.x`: + + ./build_forces.sh + +Then to run with local comms (multiprocessing) with one manager and `N` workers: + + python run_libe_forces.py --comms local --nworkers N + +To run with MPI comms using one manager and `N-1` workers: + + mpirun -np N python run_libe_forces.py + +Application parameters can be adjusted in the file `run_libe_forces.py`. + +To remove output before the next run: + + ./cleanup.sh diff --git a/libensemble/tests/scaling_tests/forces/forces_simple_xopt/run_libe_forces.py b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/run_libe_forces.py new file mode 100644 index 0000000000..f930705bc8 --- /dev/null +++ b/libensemble/tests/scaling_tests/forces/forces_simple_xopt/run_libe_forces.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +import os +import sys + +import numpy as np +from forces_simf import run_forces # Classic libEnsemble sim_f. +# from forces_simf import run_forces_dict # gest-api/xopt style simulator. + +from gest_api.vocs import VOCS +from xopt.generators.random import RandomGenerator + +from libensemble import Ensemble +from libensemble.alloc_funcs.start_only_persistent import only_persistent_gens as alloc_f +from libensemble.executors import MPIExecutor +from libensemble.specs import AllocSpecs, ExitCriteria, GenSpecs, LibeSpecs, SimSpecs + +if __name__ == "__main__": + # Initialize MPI Executor + exctr = MPIExecutor() + + # Register simulation executable with executor + sim_app = os.path.join(os.getcwd(), "../forces_app/forces.x") + + if not os.path.isfile(sim_app): + sys.exit("forces.x not found - please build first in ../forces_app dir") + + exctr.register_app(full_path=sim_app, app_name="forces") + + # Parse number of workers, comms type, etc. from arguments + ensemble = Ensemble(parse_args=True, executor=exctr) + + # Persistent gen does not need resources + ensemble.libE_specs = LibeSpecs( + gen_on_manager=True, + sim_dirs_make=True, + ) + + # Define VOCS specification + vocs = VOCS( + variables={"x": [1000, 3000]}, # min and max particles + objectives={"energy": "MINIMIZE"}, + ) + + # Create xopt random sampling generator + gen = RandomGenerator(vocs=vocs) + + ensemble.gen_specs = GenSpecs( + initial_batch_size=ensemble.nworkers, + generator=gen, + vocs=vocs, + ) + + ensemble.sim_specs = SimSpecs( + sim_f=run_forces, + # simulator=run_forces_dict, + vocs=vocs, + ) + + # Starts one persistent generator. Simulated values are returned in batch. + ensemble.alloc_specs = AllocSpecs( + alloc_f=alloc_f, + user={ + "async_return": False, # False causes batch returns + }, + ) + + # Instruct libEnsemble to exit after this many simulations + ensemble.exit_criteria = ExitCriteria(sim_max=8) + + # Run ensemble + ensemble.run() + + if ensemble.is_manager: + # Note, this will change if changing sim_max, nworkers, lb, ub, etc. + print(f'Final energy checksum: {np.sum(ensemble.H["energy"])}') diff --git a/libensemble/tests/unit_tests/RENAME_test_persistent_aposmm.py b/libensemble/tests/unit_tests/RENAME_test_persistent_aposmm.py deleted file mode 100644 index 19586a8a2d..0000000000 --- a/libensemble/tests/unit_tests/RENAME_test_persistent_aposmm.py +++ /dev/null @@ -1,173 +0,0 @@ -import multiprocessing -import platform - -import pytest - -import libensemble.gen_funcs - -libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" - -if platform.system() in ["Linux", "Darwin"]: - multiprocessing.set_start_method("fork", force=True) - -import numpy as np - -import libensemble.tests.unit_tests.setup as setup -from libensemble.sim_funcs.six_hump_camel import six_hump_camel_func, six_hump_camel_grad - -libE_info = {"comm": {}} - - -@pytest.mark.extra -def test_persis_aposmm_localopt_test(): - from libensemble.gen_funcs.persistent_aposmm import aposmm - - _, _, gen_specs_0, _, _ = setup.hist_setup1() - - H = np.zeros(4, dtype=[("f", float), ("sim_id", bool), ("dist_to_unit_bounds", float), ("sim_ended", bool)]) - H["sim_ended"] = True - H["sim_id"] = range(len(H)) - gen_specs_0["user"]["localopt_method"] = "BADNAME" - gen_specs_0["user"]["ub"] = np.ones(2) - gen_specs_0["user"]["lb"] = np.zeros(2) - - try: - aposmm(H, {}, gen_specs_0, libE_info) - except NotImplementedError: - assert 1, "Failed because method is unknown." - else: - assert 0 - - -@pytest.mark.extra -def test_update_history_optimal(): - from libensemble.gen_funcs.persistent_aposmm import update_history_optimal - - hist, _, _, _, _ = setup.hist_setup1(n=2) - - H = hist.H - - H["sim_ended"] = True - H["sim_id"] = range(len(H)) - H["f"][0] = -1e-8 - H["x_on_cube"][-1] = 1e-10 - - # Perturb x_opt point to test the case where the reported minimum isn't - # exactly in H. Also, a point in the neighborhood of x_opt has a better - # function value. - opt_ind = update_history_optimal(H["x_on_cube"][-1] + 1e-12, 1, H, np.arange(len(H))) - - assert opt_ind == 9, "Wrong point declared minimum" - - -def combined_func(x): - return six_hump_camel_func(x), six_hump_camel_grad(x) - - -@pytest.mark.extra -def test_standalone_persistent_aposmm(): - - import libensemble.gen_funcs - from libensemble.message_numbers import FINISHED_PERSISTENT_GEN_TAG - from libensemble.sim_funcs.six_hump_camel import six_hump_camel_func, six_hump_camel_grad - from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima - - libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" - from libensemble.gen_funcs.persistent_aposmm import aposmm - - persis_info = {"rand_stream": np.random.default_rng(1), "nworkers": 4} - - n = 2 - eval_max = 2000 - - gen_out = [("x", float, n), ("x_on_cube", float, n), ("sim_id", int), ("local_min", bool), ("local_pt", bool)] - - gen_specs = { - "in": ["x", "f", "grad", "local_pt", "sim_id", "sim_ended", "x_on_cube", "local_min"], - "out": gen_out, - "user": { - "initial_sample_size": 100, - # 'localopt_method': 'LD_MMA', # Needs gradients - "sample_points": np.round(minima, 1), - "localopt_method": "scipy_Nelder-Mead", - "standalone": { - "eval_max": eval_max, - "obj_func": six_hump_camel_func, - "grad_func": six_hump_camel_grad, - }, - "opt_return_codes": [0], - "nu": 1e-8, - "mu": 1e-8, - "dist_to_bound_multiple": 0.01, - "max_active_runs": 6, - "lb": np.array([-3, -2]), - "ub": np.array([3, 2]), - }, - } - H = [] - H, persis_info, exit_code = aposmm(H, persis_info, gen_specs, libE_info) - assert exit_code == FINISHED_PERSISTENT_GEN_TAG, "Standalone persistent_aposmm didn't exit correctly" - assert np.sum(H["sim_ended"]) >= eval_max, "Standalone persistent_aposmm, didn't evaluate enough points" - assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" - - tol = 1e-3 - min_found = 0 - for m in minima: - # The minima are known on this test problem. - # We use their values to test APOSMM has identified all minima - print(np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)), flush=True) - if np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)) < tol: - min_found += 1 - assert min_found >= 6, f"Found {min_found} minima" - - -@pytest.mark.extra -def test_standalone_persistent_aposmm_combined_func(): - - import libensemble.gen_funcs - from libensemble.message_numbers import FINISHED_PERSISTENT_GEN_TAG - from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima - - libensemble.gen_funcs.rc.aposmm_optimizers = "nlopt" - from libensemble.gen_funcs.persistent_aposmm import aposmm - - persis_info = {"rand_stream": np.random.default_rng(1), "nworkers": 4} - - n = 2 - eval_max = 100 - - gen_out = [("x", float, n), ("x_on_cube", float, n), ("sim_id", int), ("local_min", bool), ("local_pt", bool)] - - gen_specs = { - "in": ["x", "f", "grad", "local_pt", "sim_id", "sim_ended", "x_on_cube", "local_min"], - "out": gen_out, - "user": { - "initial_sample_size": 100, - # 'localopt_method': 'LD_MMA', # Needs gradients - "sample_points": np.round(minima, 1), - "localopt_method": "scipy_Nelder-Mead", - "standalone": {"eval_max": eval_max, "obj_and_grad_func": combined_func}, - "opt_return_codes": [0], - "nu": 1e-8, - "mu": 1e-8, - "dist_to_bound_multiple": 0.01, - "max_active_runs": 6, - "lb": np.array([-3, -2]), - "ub": np.array([3, 2]), - }, - } - - H = [] - persis_info = {"rand_stream": np.random.default_rng(1), "nworkers": 3} - H, persis_info, exit_code = aposmm(H, persis_info, gen_specs, libE_info) - - assert exit_code == FINISHED_PERSISTENT_GEN_TAG, "Standalone persistent_aposmm didn't exit correctly" - assert np.sum(H["sim_ended"]) >= eval_max, "Standalone persistent_aposmm, didn't evaluate enough points" - assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" - - -if __name__ == "__main__": - test_persis_aposmm_localopt_test() - test_update_history_optimal() - test_standalone_persistent_aposmm() - test_standalone_persistent_aposmm_combined_func() diff --git a/libensemble/tests/unit_tests/test_asktell.py b/libensemble/tests/unit_tests/test_asktell.py new file mode 100644 index 0000000000..45c286ab10 --- /dev/null +++ b/libensemble/tests/unit_tests/test_asktell.py @@ -0,0 +1,156 @@ +import numpy as np +from libensemble.utils.misc import unmap_numpy_array + + +def _check_conversion(H, npp, mapping={}): + + for field in H.dtype.names: + print(f"Comparing {field}: {H[field]} {npp[field]}") + + if isinstance(H[field], np.ndarray): + assert np.array_equal(H[field], npp[field]), f"Mismatch found in field {field}" + + elif isinstance(H[field], str) and isinstance(npp[field], str): + assert H[field] == npp[field], f"Mismatch found in field {field}" + + elif np.isscalar(H[field]) and np.isscalar(npp[field]): + assert np.isclose(H[field], npp[field]), f"Mismatch found in field {field}" + + else: + raise TypeError(f"Unhandled or mismatched types in field {field}: {type(H[field])} vs {type(npp[field])}") + + +# def test_awkward_list_dict(): +# from libensemble.utils.misc import list_dicts_to_np + +# # test list_dicts_to_np on a weirdly formatted dictionary +# # Unfortunately, we're not really checking against some original +# # libE-styled source of truth, like H. + +# weird_list_dict = [ +# { +# "x0": "abcd", +# "x1": "efgh", +# "y": 56, +# "z0": 1, +# "z1": 2, +# "z2": 3, +# "z3": 4, +# "z4": 5, +# "z5": 6, +# "z6": 7, +# "z7": 8, +# "z8": 9, +# "z9": 10, +# "z10": 11, +# "a0": "B", +# } +# ] + +# out_np = list_dicts_to_np(weird_list_dict) + +# assert all([i in ("x", "y", "z", "a0") for i in out_np.dtype.names]) + +# weird_list_dict = [ +# { +# "sim_id": 77, +# "core": 89, +# "edge": 10.1, +# "beam": 76.5, +# "energy": 12.34, +# "local_pt": True, +# "local_min": False, +# }, +# { +# "sim_id": 10, +# "core": 32.8, +# "edge": 16.2, +# "beam": 33.5, +# "energy": 99.34, +# "local_pt": False, +# "local_min": False, +# }, +# ] + +# # target dtype: [("sim_id", int), ("x, float, (3,)), ("f", float), ("local_pt", bool), ("local_min", bool)] + +# mapping = {"x": ["core", "edge", "beam"], "f": ["energy"]} +# out_np = list_dicts_to_np(weird_list_dict, mapping=mapping) + +# assert all([i in ("sim_id", "x", "f", "local_pt", "local_min") for i in out_np.dtype.names]) + + +def test_awkward_H(): + from libensemble.utils.misc import list_dicts_to_np, np_to_list_dicts + + dtype = [("a", "i4"), ("x", "f4", (3,)), ("y", "f4", (1,)), ("z", "f4", (12,)), ("greeting", "U10"), ("co2", "f8")] + H = np.zeros(2, dtype=dtype) + H[0] = (1, [1.1, 2.2, 3.3], [10.1], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "hello", "1.23") + H[1] = (2, [4.4, 5.5, 6.6], [11.1], [51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62], "goodbye", "2.23") + + list_dicts = np_to_list_dicts(H) + npp = list_dicts_to_np(list_dicts, dtype=dtype) + _check_conversion(H, npp) + + +def test_unmap_numpy_array_basic(): + """Test basic unmapping of x and x_on_cube arrays""" + + dtype = [("sim_id", int), ("x", float, (3,)), ("x_on_cube", float, (3,)), ("f", float), ("grad", float, (3,))] + H = np.zeros(2, dtype=dtype) + H[0] = (0, [1.1, 2.2, 3.3], [0.1, 0.2, 0.3], 10.5, [0.1, 0.2, 0.3]) + H[1] = (1, [4.4, 5.5, 6.6], [0.4, 0.5, 0.6], 20.7, [0.4, 0.5, 0.6]) + + mapping = {"x": ["x0", "x1", "x2"], "x_on_cube": ["x0_cube", "x1_cube", "x2_cube"]} + H_unmapped = unmap_numpy_array(H, mapping) + + expected_fields = ["sim_id", "x0", "x1", "x2", "x0_cube", "x1_cube", "x2_cube", "f"] + assert all(field in H_unmapped.dtype.names for field in expected_fields) + + assert H_unmapped["x0"][0] == 1.1 + assert H_unmapped["x1"][0] == 2.2 + assert H_unmapped["x2"][0] == 3.3 + assert H_unmapped["x0_cube"][0] == 0.1 + assert H_unmapped["x1_cube"][0] == 0.2 + assert H_unmapped["x2_cube"][0] == 0.3 + # Test that non-mapped array fields are passed through unchanged + assert "grad" in H_unmapped.dtype.names + assert np.array_equal(H_unmapped["grad"], H["grad"]) + + +def test_unmap_numpy_array_single_dimension(): + """Test unmapping with single dimension""" + + dtype = [("sim_id", int), ("x", float, (1,)), ("f", float)] + H = np.zeros(1, dtype=dtype) + H[0] = (0, [5.5], 15.0) + + mapping = {"x": ["x0"]} + H_unmapped = unmap_numpy_array(H, mapping) + + assert "x0" in H_unmapped.dtype.names + assert H_unmapped["x0"][0] == 5.5 + + +def test_unmap_numpy_array_edge_cases(): + """Test edge cases for unmap_numpy_array""" + + dtype = [("sim_id", int), ("x", float, (2,)), ("f", float)] + H = np.zeros(1, dtype=dtype) + H[0] = (0, [1.0, 2.0], 10.0) + + # No mapping + H_no_mapping = unmap_numpy_array(H, {}) + assert H_no_mapping is H + + # None array + H_none = unmap_numpy_array(None, {"x": ["x0", "x1"]}) + assert H_none is None + + +if __name__ == "__main__": + # test_awkward_list_dict() + test_awkward_H() + test_unmap_numpy_array_basic() + test_unmap_numpy_array_single_dimension() + test_unmap_numpy_array_edge_cases() diff --git a/libensemble/tests/unit_tests/test_executor.py b/libensemble/tests/unit_tests/test_executor.py index df5c8cc32d..8eefc97ed2 100644 --- a/libensemble/tests/unit_tests/test_executor.py +++ b/libensemble/tests/unit_tests/test_executor.py @@ -162,7 +162,7 @@ def is_ompi(): # ----------------------------------------------------------------------------- # The following would typically be in the user sim_func. -def polling_loop(exctr, task, timeout_sec=2, delay=0.05): +def polling_loop(exctr, task, timeout_sec=2, delay=0.1): """Iterate over a loop, polling for an exit condition""" start = time.time() @@ -194,7 +194,7 @@ def polling_loop(exctr, task, timeout_sec=2, delay=0.05): return task -def polling_loop_multitask(exctr, task_list, timeout_sec=4.0, delay=0.05): +def polling_loop_multitask(exctr, task_list, timeout_sec=4.0, delay=0.1): """Iterate over a loop, polling for exit conditions on multiple tasks""" start = time.time() @@ -421,7 +421,7 @@ def test_procs_and_machinefile_logic(): f.write(socket.gethostname() + "\n") task = exctr.submit(calc_type="sim", machinefile=machinefilename, app_args=args_for_sim) - task = polling_loop(exctr, task, timeout_sec=4, delay=0.05) + task = polling_loop(exctr, task, timeout_sec=4, delay=0.1) assert task.finished, "task.finished should be True. Returned " + str(task.finished) assert task.state == "FINISHED", "task.state should be FINISHED. Returned " + str(task.state) @@ -437,7 +437,7 @@ def test_procs_and_machinefile_logic(): ) else: task = exctr.submit(calc_type="sim", num_procs=6, num_nodes=2, procs_per_node=3, app_args=args_for_sim) - task = polling_loop(exctr, task, timeout_sec=4, delay=0.05) + task = polling_loop(exctr, task, timeout_sec=4, delay=0.1) time.sleep(0.25) assert task.finished, "task.finished should be True. Returned " + str(task.finished) assert task.state == "FINISHED", "task.state should be FINISHED. Returned " + str(task.state) @@ -462,7 +462,7 @@ def test_procs_and_machinefile_logic(): else: task = exctr.submit(calc_type="sim", num_nodes=2, procs_per_node=3, app_args=args_for_sim) assert 1 - task = polling_loop(exctr, task, timeout_sec=4, delay=0.05) + task = polling_loop(exctr, task, timeout_sec=4, delay=0.1) time.sleep(0.25) assert task.finished, "task.finished should be True. Returned " + str(task.finished) assert task.state == "FINISHED", "task.state should be FINISHED. Returned " + str(task.state) @@ -478,14 +478,14 @@ def test_procs_and_machinefile_logic(): # Testing no num_nodes (should not fail). task = exctr.submit(calc_type="sim", num_procs=2, procs_per_node=2, app_args=args_for_sim) assert 1 - task = polling_loop(exctr, task, timeout_sec=4, delay=0.05) + task = polling_loop(exctr, task, timeout_sec=4, delay=0.1) assert task.finished, "task.finished should be True. Returned " + str(task.finished) assert task.state == "FINISHED", "task.state should be FINISHED. Returned " + str(task.state) # Testing no procs_per_node (shouldn't fail) task = exctr.submit(calc_type="sim", num_nodes=1, num_procs=2, app_args=args_for_sim) assert 1 - task = polling_loop(exctr, task, timeout_sec=4, delay=0.05) + task = polling_loop(exctr, task, timeout_sec=4, delay=0.1) assert task.finished, "task.finished should be True. Returned " + str(task.finished) assert task.state == "FINISHED", "task.state should be FINISHED. Returned " + str(task.state) diff --git a/libensemble/tests/unit_tests/test_models.py b/libensemble/tests/unit_tests/test_models.py index 8477ef6f62..e0cec7b6a2 100644 --- a/libensemble/tests/unit_tests/test_models.py +++ b/libensemble/tests/unit_tests/test_models.py @@ -2,6 +2,9 @@ from pydantic import ValidationError import libensemble.tests.unit_tests.setup as setup +from gest_api.vocs import VOCS +from libensemble.gen_funcs.sampling import latin_hypercube_sample +from libensemble.sim_funcs.simple_sim import norm_eval from libensemble.specs import ExitCriteria, GenSpecs, LibeSpecs, SimSpecs, _EnsembleSpecs from libensemble.utils.misc import specs_dump @@ -116,9 +119,66 @@ def test_ensemble_specs(): _EnsembleSpecs(H0=H0, libE_specs=ls, sim_specs=ss, gen_specs=gs, exit_criteria=ec) +def test_vocs_to_sim_specs(): + """Test that SimSpecs correctly derives inputs and outputs from VOCS""" + + vocs = VOCS( + variables={"x1": [0, 1], "x2": [0, 10]}, + constants={"c1": 1.0}, + objectives={"y1": "MINIMIZE"}, + observables={"o1": float, "o2": int, "o3": (float, (3,))}, + constraints={"con1": ["GREATER_THAN", 0]}, + ) + + ss = SimSpecs(sim_f=norm_eval, vocs=vocs) + + assert ss.inputs == ["x1", "x2", "c1"] + assert len(ss.outputs) == 5 + output_dict = {} + for item in ss.outputs: + if len(item) == 2: + name, dtype = item + output_dict[name] = dtype + else: + name, dtype, shape = item + output_dict[name] = (dtype, shape) + assert output_dict["o1"] == float and output_dict["o2"] == int and output_dict["o3"] == (float, (3,)) + + # Explicit values take precedence + ss2 = SimSpecs(sim_f=norm_eval, vocs=vocs, inputs=["custom"], outputs=[("custom_out", int)]) + + assert ss2.inputs == ["custom"] and ss2.outputs == [("custom_out", int)] + + +def test_vocs_to_gen_specs(): + """Test that GenSpecs correctly derives persis_in and outputs from VOCS""" + + vocs = VOCS( + variables={"x1": [0, 1], "x2": [0, 10]}, + constants={"c1": 1.0}, + objectives={"y1": "MINIMIZE"}, + observables=["obs1"], + constraints={"con1": ["GREATER_THAN", 0]}, + ) + + gs = GenSpecs(gen_f=latin_hypercube_sample, vocs=vocs) + + assert gs.persis_in == ["x1", "x2", "c1", "y1", "obs1", "con1"] + assert len(gs.outputs) == 3 + # All default to float if dtype not specified + for name, dtype in gs.outputs: + assert dtype == float + + # Explicit values take precedence + gs2 = GenSpecs(gen_f=latin_hypercube_sample, vocs=vocs, persis_in=["custom"], out=[("custom_out", int)]) + assert gs2.persis_in == ["custom"] and gs2.outputs == [("custom_out", int)] + + if __name__ == "__main__": test_sim_gen_alloc_exit_specs() test_sim_gen_alloc_exit_specs_invalid() test_libe_specs() test_libe_specs_invalid() test_ensemble_specs() + test_vocs_to_sim_specs() + test_vocs_to_gen_specs() diff --git a/libensemble/tests/unit_tests/test_persistent_aposmm.py b/libensemble/tests/unit_tests/test_persistent_aposmm.py new file mode 100644 index 0000000000..287f1be9cb --- /dev/null +++ b/libensemble/tests/unit_tests/test_persistent_aposmm.py @@ -0,0 +1,635 @@ +import multiprocessing +import platform + +import pytest + +import libensemble.gen_funcs + +libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" + +if platform.system() in ["Linux", "Darwin"]: + multiprocessing.set_start_method("fork", force=True) + +import numpy as np + +import libensemble.tests.unit_tests.setup as setup +from libensemble.sim_funcs.six_hump_camel import six_hump_camel_func, six_hump_camel_grad + +libE_info = {"comm": {}} + + +@pytest.mark.extra +def test_persis_aposmm_localopt_test(): + from libensemble.gen_funcs.persistent_aposmm import aposmm + + _, _, gen_specs_0, _, _ = setup.hist_setup1() + + H = np.zeros(4, dtype=[("f", float), ("sim_id", bool), ("dist_to_unit_bounds", float), ("sim_ended", bool)]) + H["sim_ended"] = True + H["sim_id"] = range(len(H)) + gen_specs_0["user"]["localopt_method"] = "BADNAME" + gen_specs_0["user"]["ub"] = np.ones(2) + gen_specs_0["user"]["lb"] = np.zeros(2) + + try: + aposmm(H, {}, gen_specs_0, libE_info) + except NotImplementedError: + assert 1, "Failed because method is unknown." + else: + assert 0 + + +@pytest.mark.extra +def test_update_history_optimal(): + from libensemble.gen_funcs.persistent_aposmm import update_history_optimal + + hist, _, _, _, _ = setup.hist_setup1(n=2) + + H = hist.H + + H["sim_ended"] = True + H["sim_id"] = range(len(H)) + H["f"][0] = -1e-8 + H["x_on_cube"][-1] = 1e-10 + + # Perturb x_opt point to test the case where the reported minimum isn't + # exactly in H. Also, a point in the neighborhood of x_opt has a better + # function value. + opt_ind = update_history_optimal(H["x_on_cube"][-1] + 1e-12, 1, H, np.arange(len(H))) + + assert opt_ind == 9, "Wrong point declared minimum" + + +def combined_func(x): + return six_hump_camel_func(x), six_hump_camel_grad(x) + + +@pytest.mark.extra +def test_standalone_persistent_aposmm(): + + import libensemble.gen_funcs + from libensemble.message_numbers import FINISHED_PERSISTENT_GEN_TAG + from libensemble.sim_funcs.six_hump_camel import six_hump_camel_func, six_hump_camel_grad + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" + from libensemble.gen_funcs.persistent_aposmm import aposmm + + persis_info = {"rand_stream": np.random.default_rng(1), "nworkers": 4} + + n = 2 + eval_max = 2000 + + gen_out = [("x", float, n), ("x_on_cube", float, n), ("sim_id", int), ("local_min", bool), ("local_pt", bool)] + + gen_specs = { + "in": ["x", "f", "grad", "local_pt", "sim_id", "sim_ended", "x_on_cube", "local_min"], + "out": gen_out, + "user": { + "initial_sample_size": 100, + # 'localopt_method': 'LD_MMA', # Needs gradients + "sample_points": np.round(minima, 1), + "localopt_method": "scipy_Nelder-Mead", + "standalone": { + "eval_max": eval_max, + "obj_func": six_hump_camel_func, + "grad_func": six_hump_camel_grad, + }, + "opt_return_codes": [0], + "nu": 1e-8, + "mu": 1e-8, + "dist_to_bound_multiple": 0.01, + "max_active_runs": 6, + "lb": np.array([-3, -2]), + "ub": np.array([3, 2]), + }, + } + H = [] + H, persis_info, exit_code = aposmm(H, persis_info, gen_specs, libE_info) + assert exit_code == FINISHED_PERSISTENT_GEN_TAG, "Standalone persistent_aposmm didn't exit correctly" + assert np.sum(H["sim_ended"]) >= eval_max, "Standalone persistent_aposmm, didn't evaluate enough points" + assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" + + tol = 1e-3 + min_found = 0 + for m in minima: + # The minima are known on this test problem. + # We use their values to test APOSMM has identified all minima + print(np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)), flush=True) + if np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)) < tol: + min_found += 1 + assert min_found >= 6, f"Found {min_found} minima" + + +def _evaluate_aposmm_instance(my_APOSMM, minimum_minima=6): + from libensemble.message_numbers import FINISHED_PERSISTENT_GEN_TAG + from libensemble.sim_funcs.six_hump_camel import six_hump_camel_func + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + initial_sample = my_APOSMM.suggest(100) + + total_evals = 0 + eval_max = 2000 + + for point in initial_sample: + point["energy"] = six_hump_camel_func(np.array([point["core"], point["edge"]])) + total_evals += 1 + + my_APOSMM.ingest(initial_sample) + + potential_minima = [] + + while total_evals < eval_max: + + sample, detected_minima = my_APOSMM.suggest(6), my_APOSMM.suggest_updates() + if len(detected_minima): + for m in detected_minima: + potential_minima.append(m) + for point in sample: + point["energy"] = six_hump_camel_func(np.array([point["core"], point["edge"]])) + total_evals += 1 + my_APOSMM.ingest(sample) + my_APOSMM.finalize() + H, persis_info, exit_code = my_APOSMM.export() + + assert exit_code == FINISHED_PERSISTENT_GEN_TAG, "Standalone persistent_aposmm didn't exit correctly" + assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" + + assert len(potential_minima) >= 6, f"Found {len(potential_minima)} minima" + + tol = 1e-3 + min_found = 0 + for m in minima: + # The minima are known on this test problem. + # We use their values to test APOSMM has identified all minima + print(np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)), flush=True) + if np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)) < tol: + min_found += 1 + assert min_found >= minimum_minima, f"Found {min_found} minima" + + +@pytest.mark.extra +def test_standalone_persistent_aposmm_combined_func(): + + import libensemble.gen_funcs + from libensemble.message_numbers import FINISHED_PERSISTENT_GEN_TAG + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" + from libensemble.gen_funcs.persistent_aposmm import aposmm + + persis_info = {"rand_stream": np.random.default_rng(1), "nworkers": 4} + + n = 2 + eval_max = 100 + + gen_out = [("x", float, n), ("x_on_cube", float, n), ("sim_id", int), ("local_min", bool), ("local_pt", bool)] + + gen_specs = { + "in": ["x", "f", "grad", "local_pt", "sim_id", "sim_ended", "x_on_cube", "local_min"], + "out": gen_out, + "user": { + "initial_sample_size": 100, + # 'localopt_method': 'LD_MMA', # Needs gradients + "sample_points": np.round(minima, 1), + "localopt_method": "scipy_Nelder-Mead", + "standalone": {"eval_max": eval_max, "obj_and_grad_func": combined_func}, + "opt_return_codes": [0], + "nu": 1e-8, + "mu": 1e-8, + "dist_to_bound_multiple": 0.01, + "max_active_runs": 6, + "lb": np.array([-3, -2]), + "ub": np.array([3, 2]), + }, + } + + H = [] + persis_info = {"rand_stream": np.random.default_rng(1), "nworkers": 3} + H, persis_info, exit_code = aposmm(H, persis_info, gen_specs, libE_info) + + assert exit_code == FINISHED_PERSISTENT_GEN_TAG, "Standalone persistent_aposmm didn't exit correctly" + assert np.sum(H["sim_ended"]) >= eval_max, "Standalone persistent_aposmm, didn't evaluate enough points" + assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" + + +@pytest.mark.extra +def test_asktell_with_persistent_aposmm(): + + from gest_api.vocs import VOCS + + import libensemble.gen_funcs + from libensemble.gen_classes import APOSMM + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" + + variables = {"core": [-3, 3], "edge": [-2, 2], "core_on_cube": [0, 1], "edge_on_cube": [0, 1]} + objectives = {"energy": "MINIMIZE"} + + variables_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube"], + "f": ["energy"], + } + + vocs = VOCS(variables=variables, objectives=objectives) + + my_APOSMM = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=100, + variables_mapping=variables_mapping, + sample_points=np.round(minima, 1), + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + + _evaluate_aposmm_instance(my_APOSMM) + + +@pytest.mark.extra +def test_asktell_errors(): + + from gest_api.vocs import VOCS + + import libensemble.gen_funcs + from libensemble.gen_classes import APOSMM + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" + + variables = {"core": [-3, 3], "edge": [-2, 2], "core_on_cube": [0, 1], "edge_on_cube": [0, 1]} + objectives = {"energy": "MINIMIZE"} + + bad_mapping = { + "x": ["core", "edge"], + "f": ["energy"], + } + + vocs = VOCS( + variables=variables, + objectives=objectives, + constraints={"c1": ["LESS_THAN", 0]}, + constants={"alpha": 0.55}, + observables={"o1"}, + ) + + with pytest.raises(ValueError): + APOSMM( + vocs, + max_active_runs=6, + variables_mapping=bad_mapping, + initial_sample_size=100, + sample_points=np.round(minima, 1), + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + pytest.fail("Should have raised error for bad mapping") + + bad_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube", "blah"], + "f": ["energy"], + } + + with pytest.raises(ValueError): + APOSMM( + vocs, + max_active_runs=6, + variables_mapping=bad_mapping, + initial_sample_size=100, + sample_points=np.round(minima, 1), + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + pytest.fail("Should have raised error for bad mapping") + + variables_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube"], + "f": ["energy"], + } + + vocs = VOCS(variables=variables, objectives=objectives) + + my_APOSMM = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=6, + variables_mapping=variables_mapping, + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + + my_APOSMM.suggest() + with pytest.raises(RuntimeError): + my_APOSMM.suggest() + pytest.fail("Should've failed on consecutive empty suggests") + + my_APOSMM = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=6, + variables_mapping=variables_mapping, + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.5, + ) + + with pytest.raises(RuntimeError): + my_APOSMM.finalize() + pytest.fail("Should've failed on finalize before start") + + my_APOSMM = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=6, + variables_mapping=variables_mapping, + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + + my_APOSMM.suggest() + with pytest.raises(RuntimeError): + my_APOSMM.setup() + pytest.fail("Should've failed on consecutive setup") + my_APOSMM.finalize() + + +@pytest.mark.extra +def test_asktell_ingest_first(): + from math import gamma, pi, sqrt + + from gest_api.vocs import VOCS + + import libensemble.gen_funcs + from libensemble.gen_classes import APOSMM + from libensemble.sim_funcs.six_hump_camel import six_hump_camel_func + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + libensemble.gen_funcs.rc.aposmm_optimizers = "nlopt" + + n = 2 + + variables = {"core": [-3, 3], "edge": [-2, 2], "core_on_cube": [0, 1], "edge_on_cube": [0, 1]} + objectives = {"energy": "MINIMIZE"} + + variables_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube"], + "f": ["energy"], + } + + vocs = VOCS(variables=variables, objectives=objectives) + + my_APOSMM = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=6, + variables_mapping=variables_mapping, + localopt_method="LN_BOBYQA", + opt_return_codes=[0], + rk_const=0.5 * ((gamma(1 + (n / 2)) * 5) ** (1 / n)) / sqrt(pi), + xtol_abs=1e-6, + ftol_abs=1e-6, + dist_to_bound_multiple=0.01, + ) + + # local_H["x_on_cube"][-num_pts:] = (pts - lb) / (ub - lb) + initial_sample = [ + { + "core": minima[i][0], + "edge": minima[i][1], + "core_on_cube": (minima[i][0] - variables["core"][0]) / (variables["core"][1] - variables["core"][0]), + "edge_on_cube": (minima[i][1] - variables["edge"][0]) / (variables["edge"][1] - variables["edge"][0]), + "energy": six_hump_camel_func(np.array([minima[i][0], minima[i][1]])), + } + for i in range(6) + ] + my_APOSMM.ingest(initial_sample) + + total_evals = 0 + eval_max = 2000 + + potential_minima = [] + + while total_evals < eval_max: + + sample, detected_minima = my_APOSMM.suggest(6), my_APOSMM.suggest_updates() + if len(detected_minima): + for m in detected_minima: + potential_minima.append(m) + for point in sample: + point["energy"] = six_hump_camel_func(np.array([point["core"], point["edge"]])) + total_evals += 1 + my_APOSMM.ingest(sample) + my_APOSMM.finalize() + H, persis_info, exit_code = my_APOSMM.export() + + assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" + + assert len(potential_minima) >= 6, f"Found {len(potential_minima)} minima" + + tol = 1e-4 + min_found = 0 + for m in minima: + # The minima are known on this test problem. + # We use their values to test APOSMM has identified all minima + print(np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)), flush=True) + if np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)) < tol: + min_found += 1 + assert min_found >= 4, f"Found {min_found} minima" + + +@pytest.mark.extra +def test_asktell_consecutive_during_sample(): + """Test consecutive suggest and ingest during sample""" + + from gest_api.vocs import VOCS + + import libensemble.gen_funcs + from libensemble.gen_classes import APOSMM + from libensemble.tests.regression_tests.support import six_hump_camel_minima as minima + + libensemble.gen_funcs.rc.aposmm_optimizers = "scipy" + + variables = {"core": [-3, 3], "edge": [-2, 2], "core_on_cube": [0, 1], "edge_on_cube": [0, 1]} + objectives = {"energy": "MINIMIZE"} + + variables_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube"], + "f": ["energy"], + } + + vocs = VOCS(variables=variables, objectives=objectives) + + my_APOSMM = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=6, + variables_mapping=variables_mapping, + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + + # Test consecutive suggest + first = my_APOSMM.suggest(1) + first[0]["energy"] = six_hump_camel_func(np.array([first[0]["core"], first[0]["edge"]])) + my_APOSMM.ingest(first) + second = my_APOSMM.suggest(1) + second += my_APOSMM.suggest(4) + for point in second: + point["energy"] = six_hump_camel_func(np.array([point["core"], point["edge"]])) + # Test consecutive ingest + my_APOSMM.ingest(second[:3]) + my_APOSMM.ingest(second[3:]) + + total_evals = 0 + eval_max = 2000 + + potential_minima = [] + + while total_evals < eval_max: + + sample, detected_minima = my_APOSMM.suggest(3), my_APOSMM.suggest_updates() + sample += my_APOSMM.suggest(3) + if len(detected_minima): + for m in detected_minima: + potential_minima.append(m) + for point in sample: + point["energy"] = six_hump_camel_func(np.array([point["core"], point["edge"]])) + total_evals += 1 + my_APOSMM.ingest(sample) + + my_APOSMM.finalize() + H, persis_info, exit_code = my_APOSMM.export() + + assert persis_info.get("run_order"), "Standalone persistent_aposmm didn't do any localopt runs" + + assert len(potential_minima) >= 6, f"Found {len(potential_minima)} minima" + + tol = 1e-3 + min_found = 0 + for m in minima: + # The minima are known on this test problem. + # We use their values to test APOSMM has identified all minima + print(np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)), flush=True) + if np.min(np.sum((H[H["local_min"]]["x"] - m) ** 2, 1)) < tol: + min_found += 1 + assert min_found >= 4, f"Found {min_found} minima" + + +def _run_aposmm_export_test(variables_mapping): + """Helper function to run APOSMM export tests with given variables_mapping""" + from gest_api.vocs import VOCS + + from libensemble.gen_classes import APOSMM + + variables = { + "core": [-3, 3], + "edge": [-2, 2], + "core_on_cube": [0, 1], + "edge_on_cube": [0, 1], + } + objectives = {"energy": "MINIMIZE"} + + vocs = VOCS(variables=variables, objectives=objectives) + aposmm = APOSMM( + vocs, + max_active_runs=6, + initial_sample_size=10, + variables_mapping=variables_mapping, + localopt_method="scipy_Nelder-Mead", + opt_return_codes=[0], + nu=1e-8, + mu=1e-8, + dist_to_bound_multiple=0.01, + ) + # Test basic export before finalize + H, _, _ = aposmm.export() + print(f"Export before finalize: {H}") # Debug + assert H is None # Should be None before finalize + # Test export after suggest/ingest cycle + sample = aposmm.suggest(5) + for point in sample: + point["energy"] = 1.0 # Mock evaluation + aposmm.ingest(sample) + aposmm.finalize() + + # Test export with unmapped fields + H, _, _ = aposmm.export() + if H is not None: + assert "x" in H.dtype.names and H["x"].ndim == 2 + assert "f" in H.dtype.names and H["f"].ndim == 1 + + # Test export with user_fields + H_unmapped, _, _ = aposmm.export(user_fields=True) + print(f"H_unmapped: {H_unmapped}") # Debug + if H_unmapped is not None: + assert "core" in H_unmapped.dtype.names + assert "edge" in H_unmapped.dtype.names + assert "energy" in H_unmapped.dtype.names + # Test export with as_dicts + H_dicts, _, _ = aposmm.export(as_dicts=True) + assert isinstance(H_dicts, list) + assert isinstance(H_dicts[0], dict) + assert "x" in H_dicts[0] # x remains as array + assert "f" in H_dicts[0] + # Test export with both options + H_both, _, _ = aposmm.export(user_fields=True, as_dicts=True) + assert isinstance(H_both, list) + assert "core" in H_both[0] + assert "edge" in H_both[0] + assert "energy" in H_both[0] + + +@pytest.mark.extra +def test_aposmm_export(): + """Test APOSMM export function with different options""" + + # Test with full variables_mapping + full_mapping = { + "x": ["core", "edge"], + "x_on_cube": ["core_on_cube", "edge_on_cube"], + "f": ["energy"], + } + _run_aposmm_export_test(full_mapping) + # Test with just x_on_cube mapping (should auto-map x and f) + minimal_mapping = { + "x_on_cube": ["core_on_cube", "edge_on_cube"], + } + _run_aposmm_export_test(minimal_mapping) + + +if __name__ == "__main__": + test_persis_aposmm_localopt_test() + test_update_history_optimal() + test_standalone_persistent_aposmm() + test_standalone_persistent_aposmm_combined_func() + test_asktell_with_persistent_aposmm() + test_asktell_ingest_first() + test_asktell_consecutive_during_sample() + test_asktell_errors() + test_aposmm_export() diff --git a/libensemble/tests/unit_tests/test_ufunc_runners.py b/libensemble/tests/unit_tests/test_ufunc_runners.py index 09f17b07ec..0b362700fd 100644 --- a/libensemble/tests/unit_tests/test_ufunc_runners.py +++ b/libensemble/tests/unit_tests/test_ufunc_runners.py @@ -30,8 +30,8 @@ def get_ufunc_args(): def test_normal_runners(): calc_in, sim_specs, gen_specs = get_ufunc_args() - simrunner = Runner(sim_specs) - genrunner = Runner(gen_specs) + simrunner = Runner.from_specs(sim_specs) + genrunner = Runner.from_specs(gen_specs) assert not hasattr(simrunner, "globus_compute_executor") and not hasattr( genrunner, "globus_compute_executor" ), "Globus Compute use should not be detected without setting endpoint fields" @@ -47,7 +47,7 @@ def tupilize(arg1, arg2): sim_specs["sim_f"] = tupilize persis_info = {"hello": "threads"} - simrunner = Runner(sim_specs) + simrunner = Runner.from_specs(sim_specs) result = simrunner._result(calc_in, persis_info, {}) assert result == (calc_in, persis_info) assert hasattr(simrunner, "thread_handle") @@ -75,7 +75,7 @@ def test_globus_compute_runner_init(): sim_specs["globus_compute_endpoint"] = "1234" with mock.patch("globus_compute_sdk.Executor"): - runner = Runner(sim_specs) + runner = Runner.from_specs(sim_specs) assert hasattr( runner, "globus_compute_executor" @@ -89,7 +89,7 @@ def test_globus_compute_runner_pass(): sim_specs["globus_compute_endpoint"] = "1234" with mock.patch("globus_compute_sdk.Executor"): - runner = Runner(sim_specs) + runner = Runner.from_specs(sim_specs) # Creating Mock Globus ComputeExecutor and Globus Compute future object - no exception globus_compute_mock = mock.Mock() @@ -115,7 +115,7 @@ def test_globus_compute_runner_fail(): gen_specs["globus_compute_endpoint"] = "4321" with mock.patch("globus_compute_sdk.Executor"): - runner = Runner(gen_specs) + runner = Runner.from_specs(gen_specs) # Creating Mock Globus ComputeExecutor and Globus Compute future object - yes exception globus_compute_mock = mock.Mock() diff --git a/libensemble/tools/alloc_support.py b/libensemble/tools/alloc_support.py index bbb5e275e4..a8c9a65c86 100644 --- a/libensemble/tools/alloc_support.py +++ b/libensemble/tools/alloc_support.py @@ -280,6 +280,7 @@ def gen_work(self, wid, H_fields, H_rows, persis_info, **libE_info): H_fields = AllocSupport._check_H_fields(H_fields) libE_info["H_rows"] = AllocSupport._check_H_rows(H_rows) + libE_info["batch_size"] = len(self.avail_worker_ids(gen_workers=False)) work = { "H_fields": H_fields, diff --git a/libensemble/utils/misc.py b/libensemble/utils/misc.py index cfb4f4df20..1c10a9d88e 100644 --- a/libensemble/utils/misc.py +++ b/libensemble/utils/misc.py @@ -2,8 +2,12 @@ Misc internal functions """ -from itertools import groupby +from itertools import chain, groupby from operator import itemgetter +from typing import List + +import numpy as np +import numpy.typing as npt def extract_H_ranges(Work: dict) -> str: @@ -57,3 +61,188 @@ def specs_checker_getattr(obj, key, default=None): def specs_checker_setattr(obj, key, value): obj.__dict__[key] = value + + +def _combine_names(names: list) -> list: + """Return unique field names without auto-combining""" + return list(dict.fromkeys(names)) # preserves order, removes duplicates + + +def _get_new_dtype_fields(first: dict, mapping: dict = {}) -> list: + """build list of fields that will be in the output numpy array""" + new_dtype_names = _combine_names([i for i in first.keys()]) # -> ['x', 'y'] + fields_to_convert = list( # combining all mapping lists + chain.from_iterable(list(mapping.values())) + ) # fields like ["beam_length", "beam_width"] that will become "x" + new_dtype_names = [i for i in new_dtype_names if i not in fields_to_convert] + list( + mapping.keys() + ) # array dtype needs "x". avoid fields from mapping values since we're converting those to "x" + + # We need to accommodate "_id" getting mapped to "sim_id", but if it's not present + # in the input dictionary, then perhaps we're doing an initial sample. + # I wonder if this loop is generalizable to other fields. + if "_id" not in first and "sim_id" in mapping: + new_dtype_names.remove("sim_id") + return new_dtype_names + + +def _get_combinable_multidim_names(first: dict, new_dtype_names: list) -> list: + """Return each field name as a single-element list without auto-grouping""" + return [[name] for name in new_dtype_names] + + +def _decide_dtype(name: str, entry, size: int) -> tuple: + """decide dtype of field, and size if needed""" + if isinstance(entry, str): # use numpy style for string type + output_type = "U" + str(len(entry) + 1) + else: + output_type = type(entry) # use default "python" type + if name == "sim_id": # mapping seems to assume that sim_ids are interpretable as floats unless this...? + output_type = int + if size == 1 or not size: + return (name, output_type) + else: + return (name, output_type, (size,)) # 3-tuple for multi-dimensional + + +def _start_building_dtype( + first: dict, new_dtype_names: list, combinable_names: list, dtype: list, mapping: dict +) -> list: + """parse out necessary components of dtype for output numpy array""" + for i, entry in enumerate(combinable_names): + name = new_dtype_names[i] + size = len(combinable_names[i]) # e.g. 2 for [x0, x1] + if name not in mapping: # mapping keys are what we're converting *to* + dtype.append(_decide_dtype(name, first[entry[0]], size)) + return dtype + + +def _pack_field(input_dict: dict, field_names: list) -> tuple: + """pack dict data into tuple for slotting into numpy array""" + # {"x0": 1, "x1": 2} -> (1, 2) + return tuple(input_dict[name] for name in field_names) if len(field_names) > 1 else input_dict[field_names[0]] + + +def list_dicts_to_np(list_dicts: list, dtype: list = None, mapping: dict = {}) -> npt.NDArray: + if list_dicts is None: + return None + + if not isinstance(list_dicts, list): # presumably already a numpy array, conversion not necessary + return list_dicts + + # first entry is used to determine dtype + first = list_dicts[0] + + # build a presumptive dtype + new_dtype_names = _get_new_dtype_fields(first, mapping) + combinable_names = _get_combinable_multidim_names(first, new_dtype_names) # [['x0', 'x1'], ['z']] + + if ( + dtype is None + ): # rather roundabout. I believe default value gets set upon function instantiation. (default is mutable!) + dtype = [] + + # build dtype of non-mapped fields. appending onto empty dtype + if not len(dtype): + dtype = _start_building_dtype(first, new_dtype_names, combinable_names, dtype, mapping) + + # append dtype of mapped float fields + if len(mapping): + for name in mapping: + size = len(mapping[name]) + dtype.append(_decide_dtype(name, 0.0, size)) # float + + out = np.zeros(len(list_dicts), dtype=dtype) + + # starting packing data from list of dicts into array + for j, input_dict in enumerate(list_dicts): + for output_name, input_names in zip(new_dtype_names, combinable_names): # [('x', ['x0', 'x1']), ...] + if output_name not in mapping: + out[output_name][j] = _pack_field(input_dict, input_names) + else: + out[output_name][j] = _pack_field(input_dict, mapping[output_name]) + return out + + +def _is_multidim(selection: npt.NDArray) -> bool: + return hasattr(selection, "__len__") and len(selection) > 1 and not isinstance(selection, str) + + +def _is_singledim(selection: npt.NDArray) -> bool: + return (hasattr(selection, "__len__") and len(selection) == 1) or selection.shape == () + + +def unmap_numpy_array(array: npt.NDArray, mapping: dict = {}) -> npt.NDArray: + """Convert numpy array with mapped fields back to individual scalar fields. + Parameters + ---------- + array : npt.NDArray + Input array with mapped fields like x = [x0, x1, x2] + mapping : dict + Mapping from field names to variable names + Returns + ------- + npt.NDArray + Array with unmapped fields like x0, x1, x2 as individual scalars + """ + if not mapping or array is None: + return array + # Create new dtype with unmapped fields + new_fields = [] + for field in array.dtype.names: + if field in mapping: + for var_name in mapping[field]: + new_fields.append((var_name, array[field].dtype.type)) + else: + # Preserve the original field structure including per-row shape + field_dtype = array.dtype[field] + new_fields.append((field, field_dtype)) + unmapped_array = np.zeros(len(array), dtype=new_fields) + for field in array.dtype.names: + if field in mapping: + # Unmap array fields + if len(array[field].shape) == 1: + # Scalar field mapped to single variable + unmapped_array[mapping[field][0]] = array[field] + else: + # Multi-dimensional field + for i, var_name in enumerate(mapping[field]): + unmapped_array[var_name] = array[field][:, i] + else: + # Copy non-mapped fields + unmapped_array[field] = array[field] + return unmapped_array + + +def np_to_list_dicts(array: npt.NDArray, mapping: dict = {}) -> List[dict]: + if array is None: + return None + out = [] + + for row in array: + new_dict = {} + + for field in row.dtype.names: + if field not in list(mapping.keys()): + # Unmapped fields: copy directly (no auto-unpacking) + new_dict[field] = row[field] + else: # keys from mapping and array unpacked into corresponding fields in dicts + field_shape = array.dtype[field].shape[0] if len(array.dtype[field].shape) > 0 else 1 + assert field_shape == len(mapping[field]), ( + "dimension mismatch between mapping and array with field " + field + ) + + for i, name in enumerate(mapping[field]): + if _is_multidim(row[field]): + new_dict[name] = row[field][i] + elif _is_singledim(row[field]): + new_dict[name] = row[field] + + out.append(new_dict) + + # exiting gen: convert sim_id to _id + for entry in out: + if "sim_id" in entry: + entry["_id"] = entry.pop("sim_id") + + return out diff --git a/libensemble/utils/pydantic_bindings.py b/libensemble/utils/pydantic_bindings.py index 6c297bb95d..6ae28efe8b 100644 --- a/libensemble/utils/pydantic_bindings.py +++ b/libensemble/utils/pydantic_bindings.py @@ -15,8 +15,8 @@ check_inputs_exist, check_logical_cores, check_mpi_runner_type, - check_output_fields, check_provided_ufuncs, + check_set_gen_specs_from_variables, check_valid_comms_type, check_valid_in, check_valid_out, @@ -77,6 +77,7 @@ __validators__={ "check_valid_out": check_valid_out, "check_valid_in": check_valid_in, + "check_set_gen_specs_from_variables": check_set_gen_specs_from_variables, "genf_set_in_out_from_attrs": genf_set_in_out_from_attrs, }, ) @@ -102,7 +103,6 @@ __base__=specs._EnsembleSpecs, __validators__={ "check_exit_criteria": check_exit_criteria, - "check_output_fields": check_output_fields, "check_H0": check_H0, "check_provided_ufuncs": check_provided_ufuncs, }, diff --git a/libensemble/utils/runners.py b/libensemble/utils/runners.py index 9554b11769..b0c78a7bc6 100644 --- a/libensemble/utils/runners.py +++ b/libensemble/utils/runners.py @@ -1,22 +1,35 @@ import inspect import logging import logging.handlers +import time import numpy.typing as npt from libensemble.comms.comms import QCommThread +from libensemble.generators import LibensembleGenerator, PersistentGenInterfacer +from libensemble.message_numbers import EVAL_GEN_TAG, FINISHED_PERSISTENT_GEN_TAG, PERSIS_STOP, STOP_TAG +from libensemble.tools.persistent_support import PersistentSupport +from libensemble.utils.misc import list_dicts_to_np, np_to_list_dicts logger = logging.getLogger(__name__) class Runner: - def __new__(cls, specs): + @classmethod + def from_specs(cls, specs): if len(specs.get("globus_compute_endpoint", "")) > 0: - return super(Runner, GlobusComputeRunner).__new__(GlobusComputeRunner) - if specs.get("threaded"): # TODO: undecided interface - return super(Runner, ThreadRunner).__new__(ThreadRunner) + return GlobusComputeRunner(specs) + if specs.get("threaded"): + return ThreadRunner(specs) + if (generator := specs.get("generator")) is not None: + if isinstance(generator, PersistentGenInterfacer): + return LibensembleGenThreadRunner(specs) + if isinstance(generator, LibensembleGenerator): + return LibensembleGenRunner(specs) + else: + return StandardGenRunner(specs) else: - return super().__new__(Runner) + return Runner(specs) def __init__(self, specs): self.specs = specs @@ -85,3 +98,124 @@ def _result(self, calc_in: npt.NDArray, persis_info: dict, libE_info: dict) -> ( def shutdown(self) -> None: if self.thread_handle is not None: self.thread_handle.terminate() + + +class StandardGenRunner(Runner): + """Interact with suggest/ingest generator. Base class initialized for third-party generators.""" + + def __init__(self, specs): + super().__init__(specs) + self.gen = specs.get("generator") + + def _get_points_updates(self, batch_size: int) -> (npt.NDArray, npt.NDArray): + # no suggest_updates on external gens + return ( + list_dicts_to_np( + self.gen.suggest(batch_size), + dtype=self.specs.get("out"), + mapping=getattr(self.gen, "variables_mapping", {}), + ), + None, + ) + + def _convert_ingest(self, x: npt.NDArray) -> list: + self.gen.ingest(np_to_list_dicts(x)) + + def _loop_over_gen(self, tag, Work, H_in): + """Interact with suggest/ingest generator that *does not* contain a background thread""" + while tag not in [PERSIS_STOP, STOP_TAG]: + batch_size = self.specs.get("batch_size") or len(H_in) + H_out, _ = self._get_points_updates(batch_size) + tag, Work, H_in = self.ps.send_recv(H_out) + if H_in is not None: + self._convert_ingest(H_in) + return H_in + + def _get_initial_suggest(self, libE_info) -> npt.NDArray: + """Get initial batch from generator based on generator type""" + initial_batch = self.specs.get("initial_batch_size") or self.specs.get("batch_size") or libE_info["batch_size"] + H_out = self.gen.suggest(initial_batch) + return H_out + + def _start_generator_loop(self, tag, Work, H_in): + """Start the generator loop after choosing best way of giving initial results to gen""" + self.gen.ingest(np_to_list_dicts(H_in, mapping=getattr(self.gen, "variables_mapping", {}))) + return self._loop_over_gen(tag, Work, H_in) + + def _persistent_result(self, calc_in, persis_info, libE_info): + """Setup comms with manager, setup gen, loop gen to completion, return gen's results""" + self.ps = PersistentSupport(libE_info, EVAL_GEN_TAG) + # libE gens will hit the following line, but list_dicts_to_np will passthrough if the output is a numpy array + H_out = list_dicts_to_np( + self._get_initial_suggest(libE_info), + dtype=self.specs.get("out"), + mapping=getattr(self.gen, "variables_mapping", {}), + ) + tag, Work, H_in = self.ps.send_recv(H_out) # evaluate the initial sample + final_H_out = self._start_generator_loop(tag, Work, H_in) + self.gen.finalize() + return final_H_out, FINISHED_PERSISTENT_GEN_TAG + + def _result(self, calc_in: npt.NDArray, persis_info: dict, libE_info: dict) -> (npt.NDArray, dict, int): + if libE_info.get("persistent"): + return self._persistent_result(calc_in, persis_info, libE_info) + raise ValueError( + "suggest/ingest generators must run in persistent mode. This may be the default in the future." + ) + + +class LibensembleGenRunner(StandardGenRunner): + def _get_initial_suggest(self, libE_info) -> npt.NDArray: + """Get initial batch from generator based on generator type""" + initial_batch = self.specs.get("initial_batch_size") or self.specs.get("batch_size") or libE_info["batch_size"] + H_out = self.gen.suggest_numpy(initial_batch) + return H_out + + def _get_points_updates(self, batch_size: int) -> (npt.NDArray, list): + numpy_out = self.gen.suggest_numpy(batch_size) + if callable(getattr(self.gen, "suggest_updates", None)): + updates = self.gen.suggest_updates() + else: + updates = None + return numpy_out, updates + + def _convert_ingest(self, x: npt.NDArray) -> list: + self.gen.ingest_numpy(x) + + def _start_generator_loop(self, tag, Work, H_in) -> npt.NDArray: + """Start the generator loop after choosing best way of giving initial results to gen""" + self.gen.ingest_numpy(H_in) + return self._loop_over_gen(tag, Work, H_in) # see parent class + + +class LibensembleGenThreadRunner(StandardGenRunner): + def _get_initial_suggest(self, libE_info) -> npt.NDArray: + """Get initial batch from generator based on generator type""" + return self.gen.suggest_numpy() # libE really needs to receive the *entire* initial batch from a threaded gen + + def _suggest_and_send(self): + """Loop over generator's outbox contents, send to manager""" + while not self.gen._running_gen_f.outbox.empty(): # recv/send any outstanding messages + points = self.gen.suggest_numpy() + if callable(getattr(self.gen, "suggest_updates", None)): + updates = self.gen.suggest_updates() + else: + updates = None + if updates is not None and len(updates): + self.ps.send(points) + for i in updates: + self.ps.send(i, keep_state=True) # keep_state since an update doesn't imply "new points" + else: + self.ps.send(points) + + def _loop_over_gen(self, *args): + """Cycle between moving all outbound / inbound messages between threaded gen and manager""" + while True: + time.sleep(0.0025) # dont need to ping the gen relentlessly. Let it calculate. 400hz + self._suggest_and_send() + while self.ps.comm.mail_flag(): # receive any new messages from Manager, give all to gen + tag, _, H_in = self.ps.recv() + if tag in [STOP_TAG, PERSIS_STOP]: + self.gen.ingest_numpy(H_in, PERSIS_STOP) + return self.gen._running_gen_f.result() + self.gen.ingest_numpy(H_in) diff --git a/libensemble/utils/specs_checkers.py b/libensemble/utils/specs_checkers.py index cf33d359f7..b8e793fa51 100644 --- a/libensemble/utils/specs_checkers.py +++ b/libensemble/utils/specs_checkers.py @@ -25,28 +25,10 @@ def _check_exit_criteria(values): return values -def _check_output_fields(values): - out_names = [e[0] for e in libE_fields] - if scg(values, "H0") is not None and scg(values, "H0").dtype.names is not None: - out_names += list(scg(values, "H0").dtype.names) - out_names += [e[0] for e in scg(values, "sim_specs").outputs] - if scg(values, "gen_specs"): - out_names += [e[0] for e in scg(values, "gen_specs").outputs] - if scg(values, "alloc_specs"): - out_names += [e[0] for e in scg(values, "alloc_specs").outputs] - - for name in scg(values, "sim_specs").inputs: - assert name in out_names, ( - name + " in sim_specs['in'] is not in sim_specs['out'], " - "gen_specs['out'], alloc_specs['out'], H0, or libE_fields." - ) - - if scg(values, "gen_specs"): - for name in scg(values, "gen_specs").inputs: - assert name in out_names, ( - name + " in gen_specs['in'] is not in sim_specs['out'], " - "gen_specs['out'], alloc_specs['out'], H0, or libE_fields." - ) +def _check_set_gen_specs_from_variables(values): + if not len(scg(values, "outputs")): + if scg(values, "generator") and len(scg(values, "generator").gen_specs["out"]): + scs(values, "outputs", scg(values, "generator").gen_specs["out"]) return values diff --git a/libensemble/utils/validators.py b/libensemble/utils/validators.py index e2fec4e13d..2164bf2f40 100644 --- a/libensemble/utils/validators.py +++ b/libensemble/utils/validators.py @@ -11,8 +11,8 @@ _check_exit_criteria, _check_H0, _check_logical_cores, - _check_output_fields, _check_set_calc_dirs_on_input_dir, + _check_set_gen_specs_from_variables, _check_set_workflow_dir, ) @@ -152,13 +152,13 @@ def check_exit_criteria(self): @model_validator(mode="after") -def check_output_fields(self): - return _check_output_fields(self) +def check_H0(self): + return _check_H0(self) @model_validator(mode="after") -def check_H0(self): - return _check_H0(self) +def check_set_gen_specs_from_variables(self): + return _check_set_gen_specs_from_variables(self) @model_validator(mode="after") @@ -168,7 +168,9 @@ def check_provided_ufuncs(self): if self.alloc_specs.alloc_f.__name__ != "give_pregenerated_sim_work": assert hasattr(self.gen_specs, "gen_f"), "Generator function not provided to GenSpecs." - assert isinstance(self.gen_specs.gen_f, Callable), "Generator function is not callable." + assert ( + isinstance(self.gen_specs.gen_f, Callable) if self.gen_specs.gen_f is not None else True + ), "Generator function is not callable." return self diff --git a/libensemble/worker.py b/libensemble/worker.py index 44d5f0ddeb..8113f5d406 100644 --- a/libensemble/worker.py +++ b/libensemble/worker.py @@ -172,8 +172,8 @@ def __init__( self.workerID = workerID self.libE_specs = libE_specs self.stats_fmt = libE_specs.get("stats_fmt", {}) - self.sim_runner = Runner(sim_specs) - self.gen_runner = Runner(gen_specs) + self.sim_runner = Runner.from_specs(sim_specs) + self.gen_runner = Runner.from_specs(gen_specs) self.runners = {EVAL_SIM_TAG: self.sim_runner.run, EVAL_GEN_TAG: self.gen_runner.run} self.calc_iter = {EVAL_SIM_TAG: 0, EVAL_GEN_TAG: 0} Worker._set_executor(self.workerID, self.comm) @@ -262,6 +262,7 @@ def _handle_calc(self, Work: dict, calc_in: npt.NDArray) -> (npt.NDArray, dict, try: logger.debug(f"Starting {enum_desc}: {calc_id}") + out = None calc = self.runners[calc_type] with timer: if self.EnsembleDirectory.use_calc_dirs(calc_type): @@ -285,8 +286,8 @@ def _handle_calc(self, Work: dict, calc_in: npt.NDArray) -> (npt.NDArray, dict, if tag in [STOP_TAG, PERSIS_STOP] and message is MAN_SIGNAL_FINISH: calc_status = MAN_SIGNAL_FINISH - if out: - if len(out) >= 3: # Out, persis_info, calc_status + if out is not None: + if not isinstance(out, np.ndarray) and len(out) >= 3: # Out, persis_info, calc_status calc_status = out[2] return out elif len(out) == 2: # Out, persis_info OR Out, calc_status diff --git a/pyproject.toml b/pyproject.toml index 80535d8cbc..b362eb754e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ authors = [{name = "Jeffrey Larson"}, {name = "Stephen Hudson"}, {name = "Stefan M. Wild"}, {name = "David Bindel"}, {name = "John-Luke Navarro"}] -dependencies = [ "numpy", "psutil", "pydantic", "pyyaml", "tomli"] +dependencies = ["numpy", "psutil", "pyyaml", "tomli", "gest-api @ git+https://github.com/campa-consortium/gest-api@main", "pydantic"] description = "A Python toolkit for coordinating asynchronous and dynamic ensembles of calculations." name = "libensemble" @@ -95,7 +95,7 @@ python = ">=3.10,<3.14" pip = ">=24.3.1,<25" setuptools = ">=75.6.0,<76" numpy = ">=1.21,<3" -pydantic = ">=1.10,<3" +pydantic = ">=2.11.7,<3" pyyaml = ">=6.0,<7" tomli = ">=1.2.1,<3" psutil = ">=5.9.4,<7" @@ -105,7 +105,7 @@ clang_osx-arm64 = ">=19.1.2,<20" [tool.black] line-length = 120 -target-version = ['py39', 'py310', 'py311', 'py312', 'py313'] +target-version = ['py310', 'py311', 'py312', 'py313'] force-exclude = ''' ( /( @@ -143,4 +143,4 @@ extend-exclude = ["*.bib", "*.xml", "docs/nitpicky"] disable_error_code = ["import-not-found", "import-untyped"] [dependency-groups] -dev = ["pyenchant", "enchant>=0.0.1,<0.0.2", "flake8-modern-annotations>=1.6.0,<2", "flake8-type-checking>=3.0.0,<4"] +dev = ["pyenchant", "enchant>=0.0.1,<0.0.2", "flake8-modern-annotations>=1.6.0,<2", "flake8-type-checking>=3.0.0,<4", "wat>=0.6.0,<0.7", "globus-compute-sdk>=2.28.0,<3"]