From 897b86b331b40949483e42e6878993d91f9f6ab0 Mon Sep 17 00:00:00 2001
From: "Dr. Suneel Kumar BVS" <suneelkumar.bvs@gmail.com>
Date: Mon, 13 Oct 2025 12:18:07 +0530
Subject: [PATCH 1/2] Enhance advanced RDKit tutorials

---
 ...mical_Format_Conversion_and_Metadata.ipynb | 151 ++++++++++++++++
 5_Conformer_Generation_and_3D_Analysis.ipynb  | 160 +++++++++++++++++
 6_Reaction_Enumeration_and_Scaffolds.ipynb    | 143 +++++++++++++++
 ...ule_Standardization_and_Sanitization.ipynb | 146 +++++++++++++++
 8_QSAR_Modeling_with_Scikit_Learn.ipynb       | 167 ++++++++++++++++++
 9_Visualization_and_Drawing_Options.ipynb     | 138 +++++++++++++++
 README.md                                     |  39 ++--
 7 files changed, 934 insertions(+), 10 deletions(-)
 create mode 100644 10_Chemical_Format_Conversion_and_Metadata.ipynb
 create mode 100644 5_Conformer_Generation_and_3D_Analysis.ipynb
 create mode 100644 6_Reaction_Enumeration_and_Scaffolds.ipynb
 create mode 100644 7_Molecule_Standardization_and_Sanitization.ipynb
 create mode 100644 8_QSAR_Modeling_with_Scikit_Learn.ipynb
 create mode 100644 9_Visualization_and_Drawing_Options.ipynb
diff --git a/10_Chemical_Format_Conversion_and_Metadata.ipynb b/10_Chemical_Format_Conversion_and_Metadata.ipynb
new file mode 100644
index 0000000..5519846
--- /dev/null
+++ b/10_Chemical_Format_Conversion_and_Metadata.ipynb
@@ -0,0 +1,151 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial 10: Chemical Format Conversion and Metadata Handling\n",
+    "\n",
+    "Round-trip molecules between common chemical formats while preserving metadata fields.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "\n",
+    "- Load structure-data files (SDF) that contain rich metadata.\n",
+    "- Convert the records into pandas DataFrames for analysis.\n",
+    "- Export SMILES and SDF files with selected properties.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit.Chem import PandasTools\n",
+    "import pandas as pd\n",
+    "from io import StringIO\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Read SDF Data\n",
+    "\n",
+    "The snippet below emulates reading from disk by loading a multi-record SDF string.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "sdf_block = \"\"\"\n",
+    " Mrv2108 07152116512D\n",
+    "\n",
+    " 6  5  0  0  0  0            999 V2000\n",
+    "   1.2990   -0.7500    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n",
+    "   0.0000   -1.5000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n",
+    "  -1.2990   -0.7500    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n",
+    "  -1.2990    0.7500    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n",
+    "   0.0000    1.5000    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n",
+    "   1.2990    0.7500    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n",
+    " 1  2  2  0  0  0  0\n",
+    " 2  3  1  0  0  0  0\n",
+    " 3  4  2  0  0  0  0\n",
+    " 4  5  1  0  0  0  0\n",
+    " 5  6  2  0  0  0  0\n",
+    " 6  1  1  0  0  0  0\n",
+    "M  END\n",
+    ">  <Name>\n",
+    "Benzene\n",
+    "\n",
+    ">  <Source>\n",
+    "Example\n",
+    "\n",
+    "$$$$\n",
+    "\"\"\".strip()\n",
+    "supplier = Chem.SDMolSupplier()\n",
+    "supplier.SetData(sdf_block, sanitize=True)\n",
+    "molecules = [mol for mol in supplier if mol is not None]\n",
+    "len(molecules)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Convert to a DataFrame\n",
+    "\n",
+    "`PandasTools.LoadSDF` retains all metadata fields, making downstream analytics straightforward.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "sdf_buffer = StringIO(sdf_block)\n",
+    "df = PandasTools.LoadSDF(sdf_buffer, smilesName='smiles', molColName='ROMol')\n",
+    "df\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Write SMILES and SDF Outputs\n",
+    "\n",
+    "Export the curated data to SMILES or SDF files. String buffers allow inspection without touching disk.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "smiles_buffer = StringIO()\n",
+    "PandasTools.WriteSmi(df, smiles_buffer, molColName='ROMol', includeHeader=True, idName='Name')\n",
+    "smiles_buffer.getvalue()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "sdf_writer = Chem.SDWriter()\n",
+    "sdf_output = StringIO()\n",
+    "sdf_writer.SetOutputStream(sdf_output)\n",
+    "for mol in molecules:\n",
+    "    mol.SetProp('Processed', 'True')\n",
+    "    sdf_writer.write(mol)\n",
+    "sdf_writer.close()\n",
+    "sdf_output.getvalue().splitlines()[:10]\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/5_Conformer_Generation_and_3D_Analysis.ipynb b/5_Conformer_Generation_and_3D_Analysis.ipynb
new file mode 100644
index 0000000..d55f8c3
--- /dev/null
+++ b/5_Conformer_Generation_and_3D_Analysis.ipynb
@@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial 5: 3D Conformer Generation and Analysis\n",
+    "\n",
+    "Learn how to generate three-dimensional conformers for a molecule, optimise their geometry, and compare the resulting ensemble.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "\n",
+    "- Prepare a molecule with explicit hydrogens so that the force field has the atoms it expects.\n",
+    "- Embed several conformers with the ETKDG algorithm and perform force-field minimisation.\n",
+    "- Analyse conformer energies and pairwise RMS values to identify the most representative structures.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit.Chem import AllChem, Draw\n",
+    "from rdkit.Chem import rdMolAlign\n",
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Prepare an Example Molecule\n",
+    "\n",
+    "We will work with ibuprofen, a small drug-like molecule that exhibits several low-energy conformations.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "ibuprofen = Chem.AddHs(Chem.MolFromSmiles('CC(C)Cc1ccc(cc1)[C@@H](C)C(=O)O'))\n",
+    "ibuprofen\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate Conformers with ETKDG\n",
+    "\n",
+    "The experimental torsion knowledge distance geometry (ETKDG) method provides a robust starting point for 3D coordinates.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "params = AllChem.ETKDGv3()\n",
+    "params.randomSeed = 0xF00D\n",
+    "conformer_ids = list(AllChem.EmbedMultipleConfs(ibuprofen, numConfs=10, params=params))\n",
+    "print(f\"Generated {len(conformer_ids)} conformers\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Optimise with a Force Field\n",
+    "\n",
+    "Each conformer is refined with the Universal Force Field (UFF). The final energy (in kcal/mol) helps rank conformers.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "energy_records = []\n",
+    "for cid in conformer_ids:\n",
+    "    AllChem.UFFOptimizeMolecule(ibuprofen, confId=cid)\n",
+    "    ff = AllChem.UFFGetMoleculeForceField(ibuprofen, confId=cid)\n",
+    "    energy_records.append((cid, ff.CalcEnergy()))\n",
+    "energy_df = pd.DataFrame(energy_records, columns=['conformer_id', 'uff_energy_kcal'])\n",
+    "energy_df.sort_values('uff_energy_kcal').reset_index(drop=True)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compare Conformer Geometries\n",
+    "\n",
+    "The RMS distance matrix quantifies structural differences between conformers. Smaller values indicate similar geometries.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "rms_matrix = AllChem.GetConformerRMSMatrix(ibuprofen, prealigned=False)\n",
+    "rms_df = pd.DataFrame(\n",
+    "    data=rms_matrix,\n",
+    "    columns=[f\"conf_{i}\" for i in conformer_ids[1:]],\n",
+    "    index=[f\"conf_{i}\" for i in conformer_ids[:-1]]\n",
+    ")\n",
+    "rms_df.round(3)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visualise the Lowest-Energy Conformers\n",
+    "\n",
+    "Drawing the lowest-energy conformers helps communicate which geometry the force field prefers.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "ranked = energy_df.sort_values('uff_energy_kcal').head(4)['conformer_id'].tolist()\n",
+    "mols = [Chem.Mol(ibuprofen) for _ in ranked]\n",
+    "for new_conf, cid in zip(mols, ranked):\n",
+    "    new_conf.RemoveAllConformers()\n",
+    "    new_conf.AddConformer(ibuprofen.GetConformer(id=cid), assignId=True)\n",
+    "Draw.MolsToGridImage([Chem.RemoveHs(m) for m in mols], legends=[f\"conf {cid}\" for cid in ranked], molsPerRow=2)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/6_Reaction_Enumeration_and_Scaffolds.ipynb b/6_Reaction_Enumeration_and_Scaffolds.ipynb
new file mode 100644
index 0000000..2e10d92
--- /dev/null
+++ b/6_Reaction_Enumeration_and_Scaffolds.ipynb
@@ -0,0 +1,143 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial 6: Reaction Enumeration and Scaffold Analysis\n",
+    "\n",
+    "Use reaction SMARTS to build small virtual libraries and inspect their core scaffolds.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "\n",
+    "- Define a simple condensation reaction with RDKit SMARTS notation.\n",
+    "- Enumerate the combinatorial products from small reactant sets.\n",
+    "- Extract Bemis\u2013Murcko scaffolds from the enumerated products to understand their shared cores.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit.Chem import AllChem, Draw\n",
+    "from rdkit.Chem.Scaffolds import MurckoScaffold\n",
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define the Reaction Template\n",
+    "\n",
+    "We will model a classic amide formation between an acid chloride and an aniline derivative.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "reaction_smarts = \"[Cl:1][C:2](=O)[C:3].[NH2:4][c:5]1cccc[c:6]1>>[NH:4][C:2](=O)[c:5]1cccc[c:6]1\"\n",
+    "rxn = AllChem.ReactionFromSmarts(reaction_smarts)\n",
+    "rxn\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Enumerate the Products\n",
+    "\n",
+    "Provide small lists of reactants and use `EnumerateLibraryFromReaction` to generate the virtual products.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "acyl_chlorides = [\n",
+    "    Chem.MolFromSmiles('ClC(=O)C'),\n",
+    "    Chem.MolFromSmiles('ClC(=O)Cc1ccccc1'),\n",
+    "]\n",
+    "anilines = [\n",
+    "    Chem.MolFromSmiles('Nc1ccccc1'),\n",
+    "    Chem.MolFromSmiles('Nc1ccc(O)cc1'),\n",
+    "    Chem.MolFromSmiles('Nc1ccc(Cl)cc1'),\n",
+    "]\n",
+    "product_sets = list(AllChem.EnumerateLibraryFromReaction(rxn, [acyl_chlorides, anilines]))\n",
+    "products = [prod[0] for prod in product_sets]\n",
+    "product_smiles = [Chem.MolToSmiles(p) for p in products]\n",
+    "pd.DataFrame({'product_smiles': product_smiles})\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Extract Bemis\u2013Murcko Scaffolds\n",
+    "\n",
+    "The Murcko scaffold keeps the ring systems and linkers common to each product, revealing the shared core chemistry.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "scaffold_map = {}\n",
+    "for smiles, mol in zip(product_smiles, products):\n",
+    "    scaffold = MurckoScaffold.GetScaffoldForMol(mol)\n",
+    "    scaffold_map.setdefault(Chem.MolToSmiles(scaffold), set()).add(smiles)\n",
+    "scaffold_df = pd.DataFrame(\n",
+    "    [{'scaffold_smiles': scaffold, 'example_products': sorted(list(examples))} for scaffold, examples in scaffold_map.items()]\n",
+    ")\n",
+    "scaffold_df\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Visualise Unique Scaffolds\n",
+    "\n",
+    "Rendering the scaffolds highlights the dominant ring systems created by the enumeration.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "scaffolds = [Chem.MolFromSmiles(s) for s in scaffold_df['scaffold_smiles']]\n",
+    "Draw.MolsToGridImage(scaffolds, legends=[f\"Scaffold {i+1}\" for i in range(len(scaffolds))])\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/7_Molecule_Standardization_and_Sanitization.ipynb b/7_Molecule_Standardization_and_Sanitization.ipynb
new file mode 100644
index 0000000..dc02f34
--- /dev/null
+++ b/7_Molecule_Standardization_and_Sanitization.ipynb
@@ -0,0 +1,146 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial 7: Molecule Standardization and Sanitization\n",
+    "\n",
+    "Clean up molecular structures with RDKit's standardization utilities before downstream analysis.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "\n",
+    "- Apply the default cleanup to normalise functional groups.\n",
+    "- Remove counter-ions and neutralise charges with `rdMolStandardize` helpers.\n",
+    "- Compare the impact of standardisation in a summary table.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit.Chem import Draw\n",
+    "from rdkit.Chem import rdMolStandardize\n",
+    "import pandas as pd\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example Molecules\n",
+    "\n",
+    "A small panel of problematic molecules illustrates common standardisation tasks such as desalting and charge neutralisation.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "raw_smiles = [\n",
+    "    'CC[N+](C)(C)CC.Cl',\n",
+    "    'C[C@H](O)[C@@H](O)C(=O)O',\n",
+    "    '[O-]c1ccc(Cl)cc1',\n",
+    "    'C1=CC=[N+](C=C1)[O-]'\n",
+    "]\n",
+    "raw_molecules = [Chem.MolFromSmiles(s) for s in raw_smiles]\n",
+    "Draw.MolsToGridImage(raw_molecules, legends=[f\"Entry {i+1}\" for i in range(len(raw_molecules))])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Cleanup and Fragment Handling\n",
+    "\n",
+    "`rdMolStandardize.Cleanup` applies RDKit's built-in normalisation rules. The `LargestFragmentChooser` removes small counter-ions.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "cleanup_params = rdMolStandardize.CleanupParameters()\n",
+    "lfc = rdMolStandardize.LargestFragmentChooser()\n",
+    "cleaned = []\n",
+    "for mol in raw_molecules:\n",
+    "    cleaned_mol = rdMolStandardize.Cleanup(mol, cleanup_params)\n",
+    "    parent = lfc.choose(cleaned_mol)\n",
+    "    cleaned.append(parent)\n",
+    "[Chem.MolToSmiles(mol) for mol in cleaned]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Neutralise Charges and Generate Parents\n",
+    "\n",
+    "The `Uncharger` and `StandardizeMol` helpers return neutral, canonical parent forms.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "uncharger = rdMolStandardize.Uncharger()\n",
+    "parent_finder = rdMolStandardize.MetalDisconnector()\n",
+    "standardized = []\n",
+    "for mol in cleaned:\n",
+    "    uncharged = uncharger.uncharge(mol)\n",
+    "    disconnected = parent_finder.Disconnect(uncharged)\n",
+    "    standardized.append(rdMolStandardize.StandardizeMol(disconnected))\n",
+    "[Chem.MolToSmiles(mol) for mol in standardized]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Summary Table\n",
+    "\n",
+    "Compare the raw and standardized results to verify that the transformations preserve the intended chemistry.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "summary = pd.DataFrame({\n",
+    "    'original_smiles': raw_smiles,\n",
+    "    'standardized_smiles': [Chem.MolToSmiles(mol) for mol in standardized]\n",
+    "})\n",
+    "summary\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/8_QSAR_Modeling_with_Scikit_Learn.ipynb b/8_QSAR_Modeling_with_Scikit_Learn.ipynb
new file mode 100644
index 0000000..7388b81
--- /dev/null
+++ b/8_QSAR_Modeling_with_Scikit_Learn.ipynb
@@ -0,0 +1,167 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial 8: QSAR Modeling with scikit-learn\n",
+    "\n",
+    "Build a miniature quantitative structure\u2013activity relationship (QSAR) model using RDKit fingerprints and scikit-learn.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "\n",
+    "- Assemble a labelled dataset of example molecules.\n",
+    "- Convert the structures into Morgan fingerprints for machine-learning friendly descriptors.\n",
+    "- Train and evaluate a logistic regression classifier with standard metrics.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem, DataStructs\n",
+    "from rdkit.Chem import AllChem\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score\n",
+    "from sklearn.model_selection import train_test_split\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build a Toy Dataset\n",
+    "\n",
+    "The dataset below marks simple aromatics and heteroatom-containing molecules as active (1) or inactive (0).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "dataset = pd.DataFrame({\n",
+    "    'smiles': ['CCO', 'CCN', 'CCCl', 'c1ccccc1', 'CC(=O)O', 'CCS', 'c1ccncc1', 'CCOCC', 'c1ccc(Cl)cc1', 'CC(=O)Nc1ccccc1'],\n",
+    "    'active': [0, 0, 1, 1, 0, 1, 1, 0, 1, 1]\n",
+    "})\n",
+    "dataset\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Generate Morgan Fingerprints\n",
+    "\n",
+    "Morgan fingerprints are circular fingerprints that summarise the neighbourhood around each atom.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "radius = 2\n",
+    "n_bits = 1024\n",
+    "fps = []\n",
+    "for smiles in dataset['smiles']:\n",
+    "    mol = Chem.MolFromSmiles(smiles)\n",
+    "    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)\n",
+    "    arr = np.zeros((1,), dtype=int)\n",
+    "    DataStructs.ConvertToNumpyArray(fp, arr)\n",
+    "    fps.append(arr)\n",
+    "X = np.array(fps)\n",
+    "y = dataset['active'].values\n",
+    "X.shape\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Train/Test Split and Model Training\n",
+    "\n",
+    "We keep 30% of the data for validation and fit a logistic regression classifier on the remaining molecules.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)\n",
+    "model = LogisticRegression(max_iter=2000)\n",
+    "model.fit(X_train, y_train)\n",
+    "y_prob = model.predict_proba(X_test)[:, 1]\n",
+    "y_pred = model.predict(X_test)\n",
+    "{\n",
+    "    'roc_auc': roc_auc_score(y_test, y_prob),\n",
+    "    'accuracy': accuracy_score(y_test, y_pred)\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Inspect the Confusion Matrix\n",
+    "\n",
+    "A quick look at the confusion matrix reveals how the classifier performs on the hold-out set.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "cm = confusion_matrix(y_test, y_pred)\n",
+    "pd.DataFrame(cm, index=['actual_inactive', 'actual_active'], columns=['pred_inactive', 'pred_active'])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Highlight Influential Bits\n",
+    "\n",
+    "The absolute coefficient magnitudes correspond to fingerprint bits that the model finds important.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "coeffs = pd.Series(model.coef_[0])\n",
+    "coeffs.abs().sort_values(ascending=False).head(10)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/9_Visualization_and_Drawing_Options.ipynb b/9_Visualization_and_Drawing_Options.ipynb
new file mode 100644
index 0000000..630bfc0
--- /dev/null
+++ b/9_Visualization_and_Drawing_Options.ipynb
@@ -0,0 +1,138 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial 9: Visualisation and Drawing Options\n",
+    "\n",
+    "Explore common molecule visualisation utilities that ship with RDKit.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Objectives\n",
+    "\n",
+    "- Generate publication-ready 2D depictions.\n",
+    "- Lay out collections of molecules with grid images and highlights.\n",
+    "- Produce similarity maps to rationalise activity trends.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from rdkit import Chem\n",
+    "from rdkit.Chem import AllChem, Draw\n",
+    "from rdkit.Chem.Draw import SimilarityMaps\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Basic Depiction\n",
+    "\n",
+    "RDKit draws 2D depictions directly in notebooks. Coordinate generation ensures consistent layouts.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "aspirin = Chem.MolFromSmiles('CC(=O)Oc1ccccc1C(=O)O')\n",
+    "AllChem.Compute2DCoords(aspirin)\n",
+    "Draw.MolToImage(aspirin, size=(250, 200), legend='Aspirin')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Grid Images with Highlights\n",
+    "\n",
+    "Combine multiple molecules into a single figure and annotate matching substructures.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "series = [\n",
+    "    Chem.MolFromSmiles('c1ccncc1'),\n",
+    "    Chem.MolFromSmiles('c1ccccc1O'),\n",
+    "    Chem.MolFromSmiles('c1ccc(Cl)cc1'),\n",
+    "    Chem.MolFromSmiles('c1ccc(N)cc1')\n",
+    "]\n",
+    "patt = Chem.MolFromSmarts('c1ccccc1')\n",
+    "matches = [mol.GetSubstructMatch(patt) for mol in series]\n",
+    "Draw.MolsToGridImage(series, molsPerRow=2, highlightAtomLists=matches, legends=[f\"Mol {i+1}\" for i in range(len(series))])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Similarity Maps\n",
+    "\n",
+    "Similarity maps colour each atom according to its contribution to a fingerprint-based similarity score.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "ref = Chem.MolFromSmiles('CCOc1ccccc1O')\n",
+    "probe = Chem.MolFromSmiles('CCOc1ccc(Cl)cc1O')\n",
+    "AllChem.Compute2DCoords(ref)\n",
+    "AllChem.Compute2DCoords(probe)\n",
+    "SimilarityMaps.GetSimilarityMapForFingerprint(ref, probe, SimilarityMaps.GetMorganFingerprint)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Export to SVG\n",
+    "\n",
+    "Use the vector backends to create scalable artwork for documentation or presentations.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "drawer = Draw.MolDraw2DSVG(300, 200)\n",
+    "drawer.DrawMolecule(aspirin)\n",
+    "drawer.FinishDrawing()\n",
+    "svg = drawer.GetDrawingText()\n",
+    "svg[:200] + '...'\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index dba5a04..e21c935 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,14 @@
 # RDKit Tutorials
 
-This repository contains a collection of Jupyter notebooks that demonstrate common cheminformatics workflows using [RDKit](https://www.rdkit.org/). The notebooks walk through reading and writing SMILES, calculating molecular properties, generating fingerprints, and performing substructure or similarity searches.
+A curated collection of Jupyter notebooks showcasing practical cheminformatics workflows with [RDKit](https://www.rdkit.org/). The tutorials start with the fundamentals of reading and writing molecules and progress to advanced topics such as reaction enumeration, conformer analysis, and building QSAR models.
+
+## Table of Contents
+- [Prerequisites](#prerequisites)
+- [Environment Setup](#environment-setup)
+- [Running the Notebooks](#running-the-notebooks)
+- [Tutorial Catalog](#tutorial-catalog)
+- [Data](#data)
+- [Additional Resources](#additional-resources)
 
 ## Prerequisites
 
@@ -15,9 +23,9 @@ This repository contains a collection of Jupyter notebooks that demonstrate comm
    cd rdkit_tutorials
    ```
 
-2. **Create the Conda environment.** The repository includes an environment specification at `environment.yml`. Create the environment with:
+2. **Create the Conda environment.** The repository includes an environment specification at `rdkit.yml`.
    ```bash
-   conda env create -f environment.yml
+   conda env create -f rdkit.yml
    ```
 
 3. **Verify the installation (optional).**
@@ -49,20 +57,31 @@ or
 jupyter lab
 ```
 
-Open any of the notebooks in the repository to explore the tutorials:
+Open any of the notebooks below to explore the tutorials. Each notebook is self-contained and includes explanatory markdown cells and runnable code cells.
+
+## Tutorial Catalog
 
-- `1_Reading and Writing Smiles using rdKit.ipynb`
-- `2_Property calculation, Drug-like filters, and Similarity maps.ipynb`
-- `3_Fingerprint Generation and Similarity Analysis.ipynb`
-- `4_Substructure and Similarity Search using rdKit.ipynb`
+| # | Notebook | Topic Overview |
+| --- | --- | --- |
+| 1 | `1_Reading and Writing Smiles using rdKit.ipynb` | Read molecules from different formats, create SMILES, and save structures. |
+| 2 | `2_Property calculation, Drug-like filters, and Similarity maps.ipynb` | Calculate physicochemical properties, apply drug-likeness filters, and render similarity maps. |
+| 3 | `3_Fingerprint Generation and Similarity Analysis.ipynb` | Generate fingerprints and compute similarity metrics for compound collections. |
+| 4 | `4_Substructure and Similarity Search using rdKit.ipynb` | Perform substructure and similarity searches across chemical libraries. |
+| 5 | `5_Conformer_Generation_and_3D_Analysis.ipynb` | Embed 3D conformers, optimize geometries, and compare conformational ensembles. |
+| 6 | `6_Reaction_Enumeration_and_Scaffolds.ipynb` | Enumerate products with reaction SMARTS and analyse Bemis–Murcko scaffolds. |
+| 7 | `7_Molecule_Standardization_and_Sanitization.ipynb` | Clean molecules, neutralize charges, and extract parent fragments. |
+| 8 | `8_QSAR_Modeling_with_Scikit_Learn.ipynb` | Build a toy QSAR classifier from Morgan fingerprints using scikit-learn. |
+| 9 | `9_Visualization_and_Drawing_Options.ipynb` | Customise 2D depictions, grid images, and similarity maps. |
+| 10 | `10_Chemical_Format_Conversion_and_Metadata.ipynb` | Convert between chemical formats and preserve metadata fields. |
 
 ## Data
 
-Supporting data files referenced by the notebooks are located in the `data/` directory. Ensure you keep the directory structure intact so that relative paths in the notebooks continue to work.
+Supporting data files referenced by the notebooks are located in the `data/` directory. Keep the directory structure intact so that relative paths inside the notebooks continue to work.
 
 ## Additional Resources
 
 - [RDKit Documentation](https://www.rdkit.org/docs/)
 - [RDKit Cookbook](https://www.rdkit.org/docs/Cookbook.html)
+- [RDKit UGM Presentations](https://www.rdkit.org/UGM/) for in-depth talks and slides.
 
-Feel free to open issues or submit pull requests if you find problems or have suggestions for improvements.
+Contributions are welcome! Open an issue or submit a pull request if you have ideas for improvements or spot any problems.

From ed5ad52440c2012a5e2b3da8ea2832662ad005aa Mon Sep 17 00:00:00 2001
From: "Dr. Suneel Kumar BVS" <suneelkumar.bvs@gmail.com>
Date: Mon, 13 Oct 2025 12:23:28 +0530
Subject: [PATCH 2/2] Update 8_QSAR_Modeling_with_Scikit_Learn.ipynb

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
---
 8_QSAR_Modeling_with_Scikit_Learn.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/8_QSAR_Modeling_with_Scikit_Learn.ipynb b/8_QSAR_Modeling_with_Scikit_Learn.ipynb
index 7388b81..a19720d 100644
--- a/8_QSAR_Modeling_with_Scikit_Learn.ipynb
+++ b/8_QSAR_Modeling_with_Scikit_Learn.ipynb
@@ -78,7 +78,7 @@
     "for smiles in dataset['smiles']:\n",
     "    mol = Chem.MolFromSmiles(smiles)\n",
     "    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)\n",
-    "    arr = np.zeros((1,), dtype=int)\n",
+    "    arr = np.zeros((n_bits,), dtype=int)\n",
     "    DataStructs.ConvertToNumpyArray(fp, arr)\n",
     "    fps.append(arr)\n",
     "X = np.array(fps)\n",