rinikerlab · nmaeder · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 11, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,70 +1,29 @@
 ci:
-    autofix_commit_msg: |
-        [pre-commit.ci] auto fixes from pre-commit.com hooks
-        for more information, see https://pre-commit.ci
-    autofix_prs: true
-    autoupdate_branch: ''
-    autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
-    autoupdate_schedule: weekly
-    submodules: false
-
+  autofix_commit_msg: |
+    [pre-commit.ci] auto fixes from pre-commit.com hooks
+    for more information, see https://pre-commit.ci
+  autofix_prs: true
+  autoupdate_branch: ""
+  autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
+  autoupdate_schedule: weekly
+  submodules: false
 
 repos:
-
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.1.0
+    rev: v6.0.0
     hooks:
       - id: trailing-whitespace
-        exclude:  docs
       - id: check-added-large-files
-        args: ['--maxkb=100000']
+        args: ["--maxkb=100000"]
       - id: end-of-file-fixer
-        exclude: docs
       - id: check-yaml
         args: ["--unsafe"]
 
-
-  - repo: https://github.com/psf/black
-    rev: 22.3.0
-    hooks:
-      - id: black
-        name: Fixes formatting
-        language_version: python3
-        args: ["--line-length=120"]
-
-
-
-  - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.14.8
     hooks:
-      - id: flake8
-        name: Checks pep8 style
-        args: [
-          "--max-line-length=120",
-          # Ignore imports in init files
-          "--per-file-ignores=
-            */__init__.py:F401,setup.py:E121,*/test_jnbs/props/euler_scripts/*.py:F401 E722 E711 E266,
-            ",
-          # ignore long comments (E501), as long lines are formatted by black
-          # ignore Whitespace before ':' (E203)
-          # ignore lambdas (E731)
-          # ignore Line break occurred before a binary operator (W503)
-          # needed to not remove * imports (for example in _all_blocks.py)
-          "--ignore=E501,E203,E231,E731,W503,F405",
-        ]
-
-  - repo: local
-    hooks:
-      - id: jupyisort
-        name: Sorts ipynb imports
-        entry: jupytext --pipe-fmt ".py" --pipe "isort - --multi-line=3 --trailing-comma --force-grid-wrap=0 --use-parentheses --line-width=99" --sync
-        files: \.ipynb$
-        language: python
-        stages: [pre-push]
-
-      - id: jupyblack
-        name: Fixes ipynb format
-        entry: jupytext --pipe-fmt ".py" --pipe "black - --line-length=120" --sync
-        files: \.ipynb$
-        language: python
-        stages: [pre-push]
+      - id: ruff
+        files: ^serenityff/|^tests/
+        exclude: \.ipynb
+      - id: ruff-format
+        files: ^serenityff/|^tests/
diff --git a/dev/conda-env/test_env.yaml b/dev/conda-env/test_env.yaml
@@ -15,6 +15,7 @@ dependencies:
   - codecov
   - ipython
   - pre-commit
+  - ruff
   # Meta
   - conda-build
   # Science

diff --git a/pyproject.toml b/pyproject.toml
@@ -40,5 +40,19 @@ include-package-data = true
 [project.entry-points."openff.toolkit.plugins.handlers"]
 SerenityFFCharge = "serenityff.charge.utils.serenityff_charge_handler:SerenityFFChargeHandler"
 
-[tool.black]
+[tool.ruff]
 line-length = 120
+
+[tool.ruff.lint]
+fixable = ["I"]
+select = [
+    "E",   # pycodestyle error
+    "F",   # pyflakes
+    "I",   # isort
+    "W",   # pycodestyle warning
+]
+
+[tool.ruff.format]
+quote-style = "double"
+line-ending = "auto"
+indent-style = "space"
diff --git a/serenityff/charge/dataset_preperation/MolMorganDataset.py b/serenityff/charge/dataset_preperation/MolMorganDataset.py
diff --git a/serenityff/charge/dataset_preperation/dummy_dataset/dummy_set.ipynb b/serenityff/charge/dataset_preperation/dummy_dataset/dummy_set.ipynb
@@ -7,8 +7,10 @@
    "outputs": [],
    "source": [
     "import os\n",
+    "\n",
     "from rdkit import Chem\n",
-    "from serenityff.charge.dataset_preperation.MolMorganDataset import MolMorganDataset\n"
+    "\n",
+    "from serenityff.charge.dataset_preperation.MolMorganDataset import MolMorganDataset"
    ]
   },
   {
@@ -26,8 +28,8 @@
     }
    ],
    "source": [
-    "dummy_set1 = MolMorganDataset('./dummyset1.sdf')\n",
-    "dummy_set2 = MolMorganDataset('./dummyset2.sdf')"
+    "dummy_set1 = MolMorganDataset(\"./dummyset1.sdf\")\n",
+    "dummy_set2 = MolMorganDataset(\"./dummyset2.sdf\")"
    ]
   },
   {
@@ -85,7 +87,7 @@
     }
    ],
    "source": [
-    "dummy_set1.missings(dummy_set2, DrawMolecules=True, radius = 0)"
+    "dummy_set1.missings(dummy_set2, DrawMolecules=True, radius=0)"
    ]
   },
   {
@@ -103,8 +105,8 @@
     }
    ],
    "source": [
-    "newset = dummy_set1.add(dummy_set2, NewSetName='combined_sets')\n",
-    "#does automatically newset = MolMorganDataset('./combined_sets.sdf')"
+    "newset = dummy_set1.add(dummy_set2, NewSetName=\"combined_sets\")\n",
+    "# does automatically newset = MolMorganDataset('./combined_sets.sdf')"
    ]
   },
   {

diff --git a/serenityff/charge/dataset_preperation/set_creation.ipynb b/serenityff/charge/dataset_preperation/set_creation.ipynb
@@ -33,16 +33,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from serenityff.charge.dataset_preperation.MolMorganDataset import MolMorganDataset\n",
     "import pandas as pd\n",
-    "\n",
     "from rdkit import Chem\n",
     "\n",
-    "corrected = MolMorganDataset('path/to/corrected.sdf')\n",
-    "leadlike = MolMorganDataset('path/to/leadlike.sdf')\n",
-    "solvents = MolMorganDataset('path/to/solvents.sdf')\n",
-    "qmugs500 = MolMorganDataset('path/to/qmugs500.sdf')\n",
-    "noH500 = MolMorganDataset('path/to/noH500.sdf')"
+    "from serenityff.charge.dataset_preperation.MolMorganDataset import MolMorganDataset\n",
+    "\n",
+    "corrected = MolMorganDataset(\"path/to/corrected.sdf\")\n",
+    "leadlike = MolMorganDataset(\"path/to/leadlike.sdf\")\n",
+    "solvents = MolMorganDataset(\"path/to/solvents.sdf\")\n",
+    "qmugs500 = MolMorganDataset(\"path/to/qmugs500.sdf\")\n",
+    "noH500 = MolMorganDataset(\"path/to/noH500.sdf\")"
    ]
   },
   {
@@ -61,7 +61,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "qreduced500 = qmugs500.reduce(NewSetName = 'qreduced500', cutoff = 5)"
+    "qreduced500 = qmugs500.reduce(NewSetName=\"qreduced500\", cutoff=5)"
    ]
   },
   {
@@ -78,7 +78,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "q500solvents = qreduced500.add(otherset=solvents, NewSetName='q500solvents')"
+    "q500solvents = qreduced500.add(otherset=solvents, NewSetName=\"q500solvents\")"
    ]
   },
   {
@@ -96,8 +96,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "qcorrected = q500solvents.reduce(NewSetName='qcorrected', otherset=corrected)\n",
-    "qleadlike = qcorrected.reduce(NewSetName='leadlike', otherset=leadlike)"
+    "qcorrected = q500solvents.reduce(NewSetName=\"qcorrected\", otherset=corrected)\n",
+    "qleadlike = qcorrected.reduce(NewSetName=\"leadlike\", otherset=leadlike)"
    ]
   },
   {
@@ -114,8 +114,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "qleadreduced = qleadlike.reduce(NewSetName='qleadreduced')\n",
-    "final = qleadreduced.add(NewSetName='final', otherset=solvents)\n"
+    "qleadreduced = qleadlike.reduce(NewSetName=\"qleadreduced\")\n",
+    "final = qleadreduced.add(NewSetName=\"final\", otherset=solvents)"
    ]
   },
   {
@@ -132,7 +132,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "final = MolMorganDataset('path/to/final.sdf')\n",
+    "final = MolMorganDataset(\"path/to/final.sdf\")\n",
     "final.weight_distribution()"
    ]
   },
@@ -168,23 +168,25 @@
     "ID_tot = []\n",
     "set_ID = []\n",
     "wrongs = []\n",
-    "wrongchembls = ['CHEMBL3590587',\n",
-    " 'CHEMBL3590586',\n",
-    " 'CHEMBL3590584',\n",
-    " 'CHEMBL3590585',\n",
-    " 'CHEMBL3617051',\n",
-    " 'CHEMBL3752539'] #got these manually\n",
+    "wrongchembls = [\n",
+    "    \"CHEMBL3590587\",\n",
+    "    \"CHEMBL3590586\",\n",
+    "    \"CHEMBL3590584\",\n",
+    "    \"CHEMBL3590585\",\n",
+    "    \"CHEMBL3617051\",\n",
+    "    \"CHEMBL3752539\",\n",
+    "]  # got these manually\n",
     "\n",
     "for mol in final._mols:\n",
     "    if Chem.MolToSmiles(mol) not in smiles_tot:\n",
     "        smiles_tot.append(Chem.MolToSmiles(mol))\n",
-    "    else: \n",
-    "        print('redundant molecule')\n",
+    "    else:\n",
+    "        print(\"redundant molecule\")\n",
     "\n",
     "for mol in qmugs500._mols:\n",
     "    if Chem.MolToSmiles(mol) not in smiles_qmugs:\n",
     "        smiles_qmugs.append(Chem.MolToSmiles(mol))\n",
-    "        ID_qmugs.append(mol.GetProp('CHEMBL_ID'))\n",
+    "        ID_qmugs.append(mol.GetProp(\"CHEMBL_ID\"))\n",
     "\n",
     "for mol in corrected._mols:\n",
     "    if Chem.MolToSmiles(mol) not in smiles_corrected:\n",
@@ -197,8 +199,8 @@
     "for mol in leadlike._mols:\n",
     "    if Chem.MolToSmiles(mol) not in smiles_leadlike:\n",
     "        smiles_leadlike.append(Chem.MolToSmiles(mol))\n",
-    "        if mol.HasProp('chembl_id'):\n",
-    "            ID_leadlike.append(mol.GetProp('chembl_id'))\n",
+    "        if mol.HasProp(\"chembl_id\"):\n",
+    "            ID_leadlike.append(mol.GetProp(\"chembl_id\"))\n",
     "        else:\n",
     "            ID_leadlike.append(0)\n",
     "\n",
@@ -216,25 +218,25 @@
     "        set_ID.append(3)\n",
     "        ID_tot.append(ID_leadlike[smiles_leadlike.index(sm)])\n",
     "    else:\n",
-    "        print(smiles_tot.index(sm), ' is missing in others')\n",
+    "        print(smiles_tot.index(sm), \" is missing in others\")\n",
     "        set_ID.append(10)\n",
-    "        ID_tot.append('missing')\n",
+    "        ID_tot.append(\"missing\")\n",
     "        wrongs.append(smiles_tot.index(sm))\n",
-    "        \n",
+    "\n",
     "for i, ind in enumerate(wrongs):\n",
     "    if set_ID[ind] == 10:\n",
     "        set_ID[ind] = 3\n",
     "    else:\n",
-    "        print*('mistake', ind)\n",
-    "    if ID_tot[ind] == 'missing':\n",
-    "        ID_tot[ind]= wrongchembls[i]\n",
+    "        print * (\"mistake\", ind)\n",
+    "    if ID_tot[ind] == \"missing\":\n",
+    "        ID_tot[ind] = wrongchembls[i]\n",
     "    else:\n",
-    "        print('mistake2', ind)\n",
+    "        print(\"mistake2\", ind)\n",
     "\n",
-    "print(final._num_mol) #make sure that all list are same length and no molecules are missed\n",
+    "print(final._num_mol)  # make sure that all list are same length and no molecules are missed\n",
     "print(len(smiles_tot))\n",
     "print(len(set_ID))\n",
-    "print(len(ID_tot))\n"
+    "print(len(ID_tot))"
    ]
   },
   {
@@ -243,9 +245,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "printdata = {'Smiles': smiles_tot, \"Set_ID\": set_ID, \"CHEMBL_ID\": ID_tot}\n",
+    "printdata = {\"Smiles\": smiles_tot, \"Set_ID\": set_ID, \"CHEMBL_ID\": ID_tot}\n",
     "pls = pd.DataFrame(printdata)\n",
-    "print(len(smiles_tot),len(set_ID), len(ID_tot))"
+    "print(len(smiles_tot), len(set_ID), len(ID_tot))"
    ]
   },
   {
@@ -254,7 +256,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pls.to_csv('final_smiles.csv', index = True)"
+    "pls.to_csv(\"final_smiles.csv\", index=True)"
    ]
   },
   {
@@ -277,8 +279,9 @@
    "source": [
     "for ind in wrongs:\n",
     "    set_ID.insert(ind, 3)\n",
-    "    ID_tot.insert(ind, )\n",
-    "    "
+    "    ID_tot.insert(\n",
+    "        ind,\n",
+    "    )"
    ]
   },
   {
@@ -292,7 +295,7 @@
     "for i, mol in enumerate(qleadlike._mols):\n",
     "    if Chem.MolToSmiles(mol) in wrongsmiles:\n",
     "        qleadlikewrongs.append(i)\n",
-    "        chemblidwrongs.append(qleadlike._mols[i].GetProp('chembl_id'))"
+    "        chemblidwrongs.append(qleadlike._mols[i].GetProp(\"chembl_id\"))"
    ]
   },
   {
@@ -303,7 +306,7 @@
    "source": [
     "Chem.MolToSmiles(leadlike._mols[leadlikewrongs[0]])\n",
     "leadlike._mols[leadlikewrongs[0]]\n",
-    "leadlike._mols[leadlikewrongs[5]].GetProp('chembl_id')"
+    "leadlike._mols[leadlikewrongs[5]].GetProp(\"chembl_id\")"
    ]
   },
   {
@@ -313,7 +316,8 @@
    "outputs": [],
    "source": [
     "from rdkit.Chem.Draw import IPythonConsole\n",
-    "IPythonConsole.molSize = 450,400\n",
+    "\n",
+    "IPythonConsole.molSize = 450, 400\n",
     "IPythonConsole.drawOptions.addAtomIndices = True\n",
     "m = Chem.Mol(leadlike._mols[leadlikewrongs[0]])\n",
     "m.RemoveAllConformers()\n",
@@ -338,7 +342,7 @@
     "leadlikewrongs = []\n",
     "for i, mol in enumerate(leadlike._mols):\n",
     "    try:\n",
-    "        if mol.GetProp('chembl_id') in chemblidwrongs:\n",
+    "        if mol.GetProp(\"chembl_id\") in chemblidwrongs:\n",
     "            leadlikewrongs.append(i)\n",
     "    except:\n",
     "        continue"
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ dependencies: @@
       - codecov
       - ipython
       - pre-commit
+      - ruff
       # Meta
       - conda-build
       # Science
@@ Expand Down @@