From 5d73216c1ee3bfddc417d77c402911d11edf9d38 Mon Sep 17 00:00:00 2001 From: Kiro Agent <244629292+kiro-agent@users.noreply.github.com> Date: Tue, 27 Jan 2026 17:39:39 +0000 Subject: [PATCH] Add YOLO-based SageMaker training notebook for defect detection - Add DDA_SageMaker_YOLO_Training.ipynb notebook that uses YOLOv8 models - Provides open-source alternative to Lookout for Vision marketplace algorithm - Supports object detection, segmentation, and classification - Includes data preparation, SageMaker training job setup, and model export - Exports models to ONNX and TorchScript for edge deployment - Update README.md to document both training options Co-authored-by: rajjainl <182391521+rajjainl@users.noreply.github.com> --- DDA_SageMaker_YOLO_Training.ipynb | 427 ++++++++++++++++++++++++++++++ README.md | 26 +- 2 files changed, 452 insertions(+), 1 deletion(-) create mode 100644 DDA_SageMaker_YOLO_Training.ipynb diff --git a/DDA_SageMaker_YOLO_Training.ipynb b/DDA_SageMaker_YOLO_Training.ipynb new file mode 100644 index 0000000..f597777 --- /dev/null +++ b/DDA_SageMaker_YOLO_Training.ipynb @@ -0,0 +1,427 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "intro", + "metadata": {}, + "source": [ + "# SageMaker YOLO Training for DDA (Defect Detection Application)\n", + "\n", + "This notebook demonstrates training YOLOv8 models for defect detection using Amazon SageMaker.\n", + "\n", + "**Advantages of YOLO over Lookout for Vision:**\n", + "- Open-source and no marketplace subscription required\n", + "- Supports detection, segmentation, and classification\n", + "- Faster inference speeds\n", + "- More flexible deployment options" + ] + }, + { + "cell_type": "markdown", + "id": "prereq", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "1. Amazon SageMaker Notebook Instance or SageMaker Studio\n", + "2. IAM role with SageMaker and S3 permissions\n", + "3. Training images labeled in YOLO format or SageMaker Ground Truth format" + ] + }, + { + "cell_type": "markdown", + "id": "setup_header", + "metadata": {}, + "source": [ + "## Step 1: Environment Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "imports", + "metadata": {}, + "outputs": [], + "source": [ + "# Install required packages\n", + "!pip install -q ultralytics sagemaker boto3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "setup", + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "import sagemaker\n", + "import json\n", + "import os\n", + "import datetime\n", + "import time\n", + "from sagemaker.pytorch import PyTorch\n", + "\n", + "session = sagemaker.Session()\n", + "region = session.boto_region_name\n", + "bucket = session.default_bucket()\n", + "role = sagemaker.get_execution_role()\n", + "project = \"YOLO-defect-detection\"\n", + "\n", + "print(f\"Region: {region}\")\n", + "print(f\"Bucket: {bucket}\")\n", + "print(f\"Role: {role}\")" + ] + }, + { + "cell_type": "markdown", + "id": "s3_setup_header", + "metadata": {}, + "source": [ + "## Step 2: S3 Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "s3_setup", + "metadata": {}, + "outputs": [], + "source": [ + "# Create S3 folder structure\n", + "s3_client = boto3.client('s3')\n", + "s3_uri = f\"s3://{bucket}/{project}/\"\n", + "\n", + "folders = ['', 'data/', 'data/images/', 'data/labels/', 'output/', 'models/']\n", + "for folder in folders:\n", + " s3_client.put_object(Bucket=bucket, Key=f\"{project}/{folder}\")\n", + "\n", + "print(f\"S3 structure created: {s3_uri}\")" + ] + }, + { + "cell_type": "markdown", + "id": "data_header", + "metadata": {}, + "source": [ + "## Step 3: Prepare Training Data\n", + "\n", + "YOLO expects data in a specific format with images and corresponding label files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "download_sample", + "metadata": {}, + "outputs": [], + "source": [ + "# Download sample cookie dataset (same as LFV notebook)\n", + "!git clone --depth 1 https://github.com/aws-samples/amazon-lookout-for-vision.git\n", + "!cp -r amazon-lookout-for-vision/computer-vision-defect-detection/cookie-dataset ./\n", + "!rm -rf amazon-lookout-for-vision\n", + "print(f\"Dataset downloaded\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "convert_to_yolo", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import shutil\n", + "from pathlib import Path\n", + "\n", + "# Create YOLO dataset structure\n", + "yolo_data = Path('yolo_dataset')\n", + "(yolo_data / 'images' / 'train').mkdir(parents=True, exist_ok=True)\n", + "(yolo_data / 'images' / 'val').mkdir(parents=True, exist_ok=True)\n", + "(yolo_data / 'labels' / 'train').mkdir(parents=True, exist_ok=True)\n", + "(yolo_data / 'labels' / 'val').mkdir(parents=True, exist_ok=True)\n", + "\n", + "# Copy images and create labels (0=normal, 1=anomaly for classification)\n", + "src_dir = Path('cookie-dataset/dataset-files/training-images')\n", + "images = list(src_dir.glob('*.jpg'))\n", + "\n", + "# Split 80/20 for train/val\n", + "split_idx = int(len(images) * 0.8)\n", + "train_imgs = images[:split_idx]\n", + "val_imgs = images[split_idx:]\n", + "\n", + "for img in train_imgs:\n", + " shutil.copy(img, yolo_data / 'images' / 'train' / img.name)\n", + " # For classification, label is in filename (anomaly or normal)\n", + " label = 1 if 'anomaly' in img.name else 0\n", + " label_file = yolo_data / 'labels' / 'train' / img.name.replace('.jpg', '.txt')\n", + " # YOLO detection format: class x_center y_center width height (normalized)\n", + " # For full-image classification, use entire image as bbox\n", + " label_file.write_text(f\"{label} 0.5 0.5 1.0 1.0\\n\")\n", + "\n", + "for img in val_imgs:\n", + " shutil.copy(img, yolo_data / 'images' / 'val' / img.name)\n", + " label = 1 if 'anomaly' in img.name else 0\n", + " label_file = yolo_data / 'labels' / 'val' / img.name.replace('.jpg', '.txt')\n", + " label_file.write_text(f\"{label} 0.5 0.5 1.0 1.0\\n\")\n", + "\n", + "print(f\"Train images: {len(train_imgs)}, Val images: {len(val_imgs)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "create_yaml", + "metadata": {}, + "outputs": [], + "source": [ + "# Create YOLO dataset configuration file\n", + "yaml_content = f\"\"\"# YOLO Dataset Configuration\n", + "path: /opt/ml/input/data/training\n", + "train: images/train\n", + "val: images/val\n", + "\n", + "# Classes\n", + "names:\n", + " 0: normal\n", + " 1: anomaly\n", + "\"\"\"\n", + "\n", + "with open('yolo_dataset/data.yaml', 'w') as f:\n", + " f.write(yaml_content)\n", + "\n", + "print(\"Created data.yaml configuration\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "upload_data", + "metadata": {}, + "outputs": [], + "source": [ + "# Upload dataset to S3\n", + "!aws s3 sync yolo_dataset s3://{bucket}/{project}/data/ --quiet\n", + "training_data_uri = f\"s3://{bucket}/{project}/data/\"\n", + "print(f\"Data uploaded to: {training_data_uri}\")" + ] + }, + { + "cell_type": "markdown", + "id": "training_header", + "metadata": {}, + "source": [ + "## Step 4: Create Training Script" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "training_script", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile train_yolo.py\n", + "import os\n", + "import argparse\n", + "from ultralytics import YOLO\n", + "\n", + "def main():\n", + " parser = argparse.ArgumentParser()\n", + " parser.add_argument('--epochs', type=int, default=50)\n", + " parser.add_argument('--batch-size', type=int, default=16)\n", + " parser.add_argument('--img-size', type=int, default=640)\n", + " parser.add_argument('--model', type=str, default='yolov8n.pt')\n", + " args = parser.parse_args()\n", + " \n", + " # SageMaker paths\n", + " data_dir = '/opt/ml/input/data/training'\n", + " model_dir = '/opt/ml/model'\n", + " \n", + " # Load pretrained model\n", + " model = YOLO(args.model)\n", + " \n", + " # Train\n", + " results = model.train(\n", + " data=os.path.join(data_dir, 'data.yaml'),\n", + " epochs=args.epochs,\n", + " batch=args.batch_size,\n", + " imgsz=args.img_size,\n", + " project=model_dir,\n", + " name='defect_detection'\n", + " )\n", + " \n", + " # Export to ONNX for deployment\n", + " best_model = YOLO(os.path.join(model_dir, 'defect_detection/weights/best.pt'))\n", + " best_model.export(format='onnx')\n", + " best_model.export(format='torchscript')\n", + " \n", + " print(f\"Training complete. Model saved to {model_dir}\")\n", + "\n", + "if __name__ == '__main__':\n", + " main()" + ] + }, + { + "cell_type": "markdown", + "id": "sm_training_header", + "metadata": {}, + "source": [ + "## Step 5: Launch SageMaker Training Job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "sm_estimator", + "metadata": {}, + "outputs": [], + "source": [ + "from sagemaker.pytorch import PyTorch\n", + "\n", + "# Define the estimator\n", + "yolo_estimator = PyTorch(\n", + " entry_point='train_yolo.py',\n", + " role=role,\n", + " instance_count=1,\n", + " instance_type='ml.g4dn.xlarge', # GPU instance\n", + " framework_version='2.0.0',\n", + " py_version='py310',\n", + " output_path=f's3://{bucket}/{project}/output',\n", + " hyperparameters={\n", + " 'epochs': 50,\n", + " 'batch-size': 16,\n", + " 'img-size': 640,\n", + " 'model': 'yolov8n.pt' # nano model for fast training\n", + " },\n", + " # Install ultralytics during training\n", + " source_dir='.',\n", + " dependencies=['requirements.txt'] if os.path.exists('requirements.txt') else None\n", + ")\n", + "\n", + "print(\"Estimator configured\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "create_requirements", + "metadata": {}, + "outputs": [], + "source": [ + "# Create requirements file for training container\n", + "with open('requirements.txt', 'w') as f:\n", + " f.write('ultralytics>=8.0.0\\n')\n", + " f.write('onnx\\n')\n", + " f.write('onnxruntime\\n')\n", + "print(\"Created requirements.txt\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "start_training", + "metadata": {}, + "outputs": [], + "source": [ + "# Start training job\n", + "job_name = f\"yolo-defect-{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}\"\n", + "yolo_estimator.fit(\n", + " inputs={'training': training_data_uri},\n", + " job_name=job_name,\n", + " wait=True,\n", + " logs='All'\n", + ")\n", + "print(f\"Training job {job_name} completed!\")" + ] + }, + { + "cell_type": "markdown", + "id": "download_header", + "metadata": {}, + "source": [ + "## Step 6: Download and Prepare Model for DDA Deployment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "download_model", + "metadata": {}, + "outputs": [], + "source": [ + "# Download trained model\n", + "model_artifact = yolo_estimator.model_data\n", + "print(f\"Model artifact: {model_artifact}\")\n", + "\n", + "!aws s3 cp {model_artifact} ./yolo_model.tar.gz\n", + "!mkdir -p yolo_model && tar -xzf yolo_model.tar.gz -C yolo_model\n", + "!ls -la yolo_model/" + ] + }, + { + "cell_type": "markdown", + "id": "compile_header", + "metadata": {}, + "source": [ + "## Step 7: Compile Model for Edge Deployment (Optional)\n", + "\n", + "Use SageMaker Neo to compile the ONNX model for specific target devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "compile_model", + "metadata": {}, + "outputs": [], + "source": [ + "# Optional: Compile with SageMaker Neo for edge deployment\n", + "sagemaker_client = boto3.client('sagemaker')\n", + "\n", + "# Find ONNX model and repackage\n", + "import tarfile\n", + "onnx_path = 'yolo_model/defect_detection/weights/best.onnx'\n", + "if os.path.exists(onnx_path):\n", + " with tarfile.open('yolo_onnx.tar.gz', 'w:gz') as tar:\n", + " tar.add(onnx_path, arcname='model.onnx')\n", + " \n", + " # Upload for compilation\n", + " onnx_s3 = f's3://{bucket}/{project}/models/yolo_onnx.tar.gz'\n", + " !aws s3 cp yolo_onnx.tar.gz {onnx_s3}\n", + " print(f\"ONNX model uploaded to: {onnx_s3}\")\n", + "else:\n", + " print(\"ONNX model not found - check training output\")" + ] + }, + { + "cell_type": "markdown", + "id": "summary", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This notebook trained a YOLOv8 model for defect detection.\n", + "\n", + "**Output models:**\n", + "- `best.pt` - PyTorch model\n", + "- `best.onnx` - ONNX model for cross-platform deployment\n", + "- `best.torchscript` - TorchScript for optimized inference\n", + "\n", + "**Next steps:**\n", + "- Use the ONNX model with DDA edge application\n", + "- Fine-tune hyperparameters for better accuracy\n", + "- Add more training data for improved performance" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/README.md b/README.md index c02486e..711dadd 100644 --- a/README.md +++ b/README.md @@ -219,6 +219,28 @@ DDA consists of several key components: - AWS IAM (for permissions) - Amazon SageMaker for Model Training and Compiling +## SageMaker Training Notebooks + +DDA provides two approaches for training defect detection models: + +### Option 1: YOLO Models (Recommended) +**Notebook**: `DDA_SageMaker_YOLO_Training.ipynb` + +- Uses open-source YOLOv8 models (no marketplace subscription required) +- Supports object detection, segmentation, and classification +- Faster inference speeds suitable for real-time edge deployment +- Exports to ONNX and TorchScript for flexible deployment + +### Option 2: Lookout for Vision Algorithm +**Notebook**: `DDA_SageMaker_Model_Training_and_Compilation.ipynb` + +- Uses AWS Marketplace Computer Vision Defect Detection algorithm +- Requires marketplace subscription +- Supports classification and segmentation models +- Compiles for multiple targets (x86, ARM64, Jetson Xavier) + +Both notebooks support the same deployment workflow via Greengrass components. + ## Quick Start @@ -448,7 +470,9 @@ DDA consists of several key components: #### Step 4: Deploy ML Model (Optional) -1. **Train and Compile model using Amazon SageMaker** (see [SageMaker blog guide](https://aws.amazon.com/blogs/machine-learning/)) +1. **Train and Compile model using Amazon SageMaker**: + - **Option A (Lookout for Vision)**: Use `DDA_SageMaker_Model_Training_and_Compilation.ipynb` for AWS Marketplace algorithm + - **Option B (YOLO - Recommended)**: Use `DDA_SageMaker_YOLO_Training.ipynb` for open-source YOLOv8 models 2. **Create model component**: - Use `DDA_Greengrass_Component_Creator.ipynb` notebook