diff --git a/examples/tutorials/workgraphs/eos_fine_tuning.ipynb b/examples/tutorials/workgraphs/eos_fine_tuning.ipynb new file mode 100644 index 00000000..2dc1467d --- /dev/null +++ b/examples/tutorials/workgraphs/eos_fine_tuning.ipynb @@ -0,0 +1,465 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "75afa95b", + "metadata": {}, + "source": [ + "# Equation of State Fine-tuning" + ] + }, + { + "cell_type": "markdown", + "id": "e5c9a2fb", + "metadata": {}, + "source": [ + "## Aim\n", + "\n", + "This notebook demonstrates how we can connect and execute tasks with an example fine-tuning workflow, using structures from an equation of state calculation. As in descriptors_filter_qe.ipynb, reference energies and forces are calculated using `Quantum Espresso` on an external computer, but we more directly pass this `StructureData`, as we do not filter the structures to be used in fine-tuning." + ] + }, + { + "cell_type": "markdown", + "id": "3ae9b9ce", + "metadata": {}, + "source": [ + "### Setup\n", + "\n", + "For this tutorial we will assume you have: \n", + "\n", + "\n", + "The initial setup is very similar to the other tutorials, such as ../calculations/singlepoint.ipynb, which goes into more detail about what each step is doing" + ] + }, + { + "cell_type": "markdown", + "id": "83b3068d", + "metadata": {}, + "source": [ + "Load the aiida profile, model and code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "233a4da4", + "metadata": {}, + "outputs": [], + "source": [ + "from aiida import load_profile \n", + "load_profile()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3ca17e8", + "metadata": {}, + "outputs": [], + "source": [ + "from aiida.orm import load_code\n", + "from aiida_mlip.data.model import ModelData\n", + "from ase.build import bulk\n", + "\n", + "uri = \"https://github.com/stfc/janus-core/raw/main/tests/models/mace_mp_small.model\"\n", + "model = ModelData.from_uri(uri, architecture=\"mace_mp\")\n", + "\n", + "janus_code = load_code(\"janus@localhost\")\n", + "qe_code = load_code(\"qe@scarf\")\n", + "\n", + "initial_atoms = bulk(\"NaCl\", \"rocksalt\", 5.63)" + ] + }, + { + "cell_type": "markdown", + "id": "2e85ae56", + "metadata": {}, + "source": [ + "First, we set up the scaling task. It takes a `StructureData` object as input and produces atoms in fractional coordinates. The number of generated atomic structures is determined by the `num_structs` parameter. The task returns a dictionary containing these structures." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c433338", + "metadata": {}, + "outputs": [], + "source": [ + "from aiida_workgraph import WorkGraph, task\n", + "import numpy as np\n", + "from aiida.orm import Int, Float, List, Dict, SinglefileData,InstalledCode, KpointsData, StructureData, load_group\n", + "\n", + "from random import shuffle\n", + "\n", + "@task.calcfunction(outputs=[\"scaled_structures\"])\n", + "def create_scales(\n", + " min_v: Float,\n", + " max_v: Float,\n", + " num_structs: Int,\n", + " structure: StructureData\n", + "): \n", + " lattice_scalars = np.cbrt(np.linspace(min_v.value, max_v.value, num_structs.value))\n", + " scaled_structures = {}\n", + "\n", + " atom = structure.get_ase()\n", + " cell = atom.get_cell()\n", + "\n", + " for i, scalars in enumerate(lattice_scalars):\n", + " scaled_atom = atom.copy()\n", + " scaled_atom.set_cell(cell * scalars, scale_atoms=True)\n", + " struct_data = f\"struct{i}\"\n", + " scaled_structures[struct_data] = StructureData(ase=scaled_atom)\n", + "\n", + "\n", + " return {\n", + " \"scaled_structures\": scaled_structures\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "8806101d", + "metadata": {}, + "source": [ + "Before setting up the work graph, we first configure the `Quantum Espresso (QE)` task by defining the code and input parameters. Since we need to run QE on multiple structures, we create multiple `PwCalculation` tasks dynamically within the same task using `get_current_graph()`. This allows us to run QE for each structure and return the corresponding `TrajectoryData` and parameters for each." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "661c4051", + "metadata": {}, + "outputs": [], + "source": [ + "from aiida_quantumespresso.calculations.pw import PwCalculation\n", + "from aiida_workgraph.manager import get_current_graph\n", + "\n", + "@task.graph(outputs=[\"structures\"])\n", + "def qe(\n", + " code: InstalledCode,\n", + " kpoints_mesh: List,\n", + " task_metadata: Dict,\n", + " **scaled_structures,\n", + "):\n", + "\n", + " wg = get_current_graph()\n", + "\n", + " kpoints = KpointsData()\n", + " kpoints.set_kpoints_mesh(kpoints_mesh)\n", + "\n", + " pseudo_family = load_group('SSSP/1.3/PBE/efficiency')\n", + " \n", + " output_structures = {}\n", + "\n", + " for i, structs in scaled_structures.items():\n", + " \n", + " structure = StructureData(ase=structs.get_ase())\n", + " pseudos = pseudo_family.get_pseudos(structure=structure)\n", + "\n", + " ecutwfc, ecutrho = pseudo_family.get_recommended_cutoffs(\n", + " structure=structure,\n", + " unit='Ry',\n", + " )\n", + "\n", + " pw_params = {\n", + " \"CONTROL\": {\n", + " \"calculation\": \"scf\",\n", + " 'tprnfor': True,\n", + " 'tstress': True,\n", + " },\n", + " \"SYSTEM\": {\n", + " \"ecutwfc\": ecutwfc,\n", + " \"ecutrho\": ecutrho,\n", + " },\n", + " }\n", + " \n", + " qe_task = wg.add_task(\n", + " PwCalculation,\n", + " code=code,\n", + " parameters=pw_params,\n", + " kpoints=kpoints,\n", + " pseudos=pseudos,\n", + " metadata=task_metadata.value,\n", + " structure=structure,\n", + " )\n", + "\n", + " output_structures[f\"struct{i}\"] = {\n", + " \"trajectory\":qe_task.outputs.output_trajectory,\n", + " \"parameters\": qe_task.outputs.output_parameters\n", + " }\n", + " \n", + " wg.update_ctx({\n", + " \"structures\": output_structures\n", + " })\n", + "\n", + " return {\n", + " \"structures\": wg.ctx.structures,\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "869158e0", + "metadata": {}, + "source": [ + "The `create_train_files` task extracts the attributes needed from each structure and splits structures into random test, train and validation files. This task returns `SinglefileData` instances of `test_file`, `train_file` and `valid_file`. This task differes from `descriptors_filter_qe.ipynb` `create_train_file` as in this task we split up the training files within the task as oppose to having them passed in and this task returns `SinglefileData` objects of the structures split up into `test_file`, `train_file` and `valid_file`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44674a29", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from ase.io import iread, write\n", + "from ase import units\n", + "from tempfile import NamedTemporaryFile\n", + "\n", + "@task.calcfunction(outputs=[\"test_file\", \"train_file\", \"valid_file\"])\n", + "def create_train_files(**structures):\n", + " \n", + " structures_stack = list(structures.keys())\n", + " shuffle(structures_stack)\n", + " \n", + " n = len(structures_stack)\n", + " i1 = int(n*0.7)\n", + " i2 = int(n*0.9)\n", + "\n", + " training_split = {\n", + " \"test\": structures_stack[:i1],\n", + " \"train\": structures_stack[i1:i2],\n", + " \"valid\": structures_stack[i2:]\n", + " }\n", + "\n", + " files = {}\n", + "\n", + " for split, split_structures in training_split.items():\n", + " \n", + " with NamedTemporaryFile(suffix=f\"{split}.extxyz\") as tmp:\n", + "\n", + " for struct in split_structures:\n", + "\n", + " trajectory = structures[struct][\"trajectory\"]\n", + " fileStructure = trajectory.get_structure(index=0)\n", + " fileAtoms = fileStructure.get_ase()\n", + "\n", + " stress = trajectory.arrays[\"stress\"][0]\n", + " converted_stress = stress * units.GPa\n", + " fileAtoms.info[\"qe_stress\"] = converted_stress\n", + "\n", + " fileAtoms.info[\"units\"] = {\"energy\": \"eV\",\"forces\": \"ev/Ang\",\"stress\": \"ev/Ang^3\"}\n", + " fileAtoms.set_array(\"qe_forces\", trajectory.arrays[\"forces\"][0])\n", + "\n", + " parameters = structures[struct][\"parameters\"]\n", + " fileParams = parameters.get_dict()\n", + " fileAtoms.info[\"qe_energy\"] = fileParams[\"energy\"]\n", + " \n", + " write(Path(tmp.name), fileAtoms, append=True)\n", + "\n", + " files[f\"{split}_file\"] = SinglefileData(tmp)\n", + " \n", + " for filename, file in files.items():\n", + " with file.as_path() as path:\n", + " num_structs = sum(1 for _ in iread(path))\n", + " print(f\"{filename} has {num_structs} structures\")\n", + "\n", + " return{\n", + " \"test_file\": files[\"test_file\"],\n", + " \"train_file\": files[\"train_file\"],\n", + " \"valid_file\": files[\"valid_file\"]\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "d3396d54", + "metadata": {}, + "source": [ + "Setup the QE inputs these variables can be changed to your configuration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a35e9ed6", + "metadata": {}, + "outputs": [], + "source": [ + "qe_inputs = {\n", + " \"task_metadata\": Dict({\n", + " \"options\": {\n", + " \"resources\": {\n", + " \"num_machines\": 1,\n", + " \"num_mpiprocs_per_machine\": 32,\n", + " },\n", + " \"max_wallclock_seconds\": 3600,\n", + " \"queue_name\": \"scarf\",\n", + " \"qos\": \"scarf\",\n", + " \"environment_variables\": {},\n", + " \"withmpi\": True,\n", + " \"prepend_text\": \"\"\"\n", + " module purge\n", + " module use /work4/scd/scarf562/eb-common/modules/all\n", + " module load amd-modules\n", + " module load QuantumESPRESSO/7.2-foss-2023a\n", + " \"\"\",\n", + " \"append_text\": \"\",\n", + " },\n", + " }),\n", + " \"kpoints_mesh\": List([1, 1, 1]),\n", + " \"code\": qe_code,\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9074302a", + "metadata": {}, + "outputs": [], + "source": [ + "from ase.build import bulk\n", + "\n", + "with WorkGraph(\"EOS_workflow\") as wg:\n", + "\n", + " initial_structure = StructureData(ase=initial_atoms)\n", + "\n", + " scales_task = wg.add_task(\n", + " create_scales,\n", + " min_v=0.95,\n", + " max_v=1.05,\n", + " num_structs=15,\n", + " structure=initial_structure\n", + " )\n", + "\n", + " qe_task = wg.add_task(\n", + " qe,\n", + " **qe_inputs,\n", + " scaled_structures=scales_task.outputs.scaled_structures\n", + " )\n", + " \n", + " train_task = wg.add_task(\n", + " create_train_files,\n", + " structures=qe_task.outputs.structures\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "f97d4126", + "metadata": {}, + "source": [ + "Visualise and run `WorkGraph`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59425796", + "metadata": {}, + "outputs": [], + "source": [ + "wg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f230babb", + "metadata": {}, + "outputs": [], + "source": [ + "wg.run()" + ] + }, + { + "cell_type": "markdown", + "id": "f8a6b9ee", + "metadata": {}, + "source": [ + "We can can use the outputs to viusualise the data, in this example we will print a simple bar chart of the energies:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b96d1ae5", + "metadata": {}, + "outputs": [], + "source": [ + "output_files = {\n", + " \"test_file\": wg.tasks.create_train_files.outputs.test_file.value,\n", + " \"train_file\": wg.tasks.create_train_files.outputs.train_file.value,\n", + " \"valid_file\": wg.tasks.create_train_files.outputs.valid_file.value\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9b17b6dd", + "metadata": {}, + "outputs": [], + "source": [ + "from ase.io import iread\n", + "import matplotlib.pyplot as plt\n", + "\n", + "energies = {}\n", + "volumes = {}\n", + "for key, file in output_files.items():\n", + " with file.as_path() as path:\n", + " energies[key]= np.array([struct.info['qe_energy']\n", + " for struct in iread(path, index=':')])\n", + " volumes[key]= np.array([struct.get_volume() \n", + " for struct in iread(path, index=':')])\n", + " \n", + "lens = np.cumsum([len(x) for x in energies.values()])\n", + "\n", + "fig, ax = plt.subplots(figsize=(12, 5))\n", + "for (key, energy), volume in zip(energies.items(), volumes.values(), strict=True):\n", + " ax.scatter(volume, energy, label=key.capitalize())\n", + "\n", + "ax.set_ylabel(\"Energy\")\n", + "ax.set_xlabel(\"Volume\")\n", + "ax.legend()\n", + "\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aiida-mlip", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}