From 9a9ddf5987d10bac535a975d17e4ac8aeac99f03 Mon Sep 17 00:00:00 2001 From: Sizwe Molefe Date: Mon, 20 Oct 2025 18:13:01 +0100 Subject: [PATCH] Added pdfplumber,tools and pillow tools --- .../03_image_rag_using_kb.ipynb | 19 +++++++++---------- .../requirements.txt | 6 ++++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/audio-video-rag/03_image_rag_using_kb.ipynb b/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/audio-video-rag/03_image_rag_using_kb.ipynb index c7c4b2a5..da198dbb 100644 --- a/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/audio-video-rag/03_image_rag_using_kb.ipynb +++ b/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/audio-video-rag/03_image_rag_using_kb.ipynb @@ -162,13 +162,14 @@ "outputs": [], "source": [ "import requests\n", - "import fitz\n", "from PIL import Image\n", + "import pdfplumber\n", "import io\n", "import base64\n", "import boto3\n", "import os\n", "from urllib.parse import urlparse\n", + "import tools\n", "\n", "\n", "\n", @@ -186,21 +187,19 @@ " return None\n", "\n", "def pdf_to_images(pdf_content, quality=75, max_size=(1024, 1024)):\n", - " \"\"\"Convert PDF to list of images\"\"\"\n", + " \"\"\"Convert PDF to list of images using pdfplumber\"\"\"\n", " images = []\n", " try:\n", - " with fitz.open(stream=pdf_content.getvalue(), filetype=\"pdf\") as doc:\n", - " for page_num, page in enumerate(doc):\n", - " # Get page pixmap\n", - " pix = page.get_pixmap(matrix=fitz.Matrix(300/72, 300/72))\n", - " # Convert to PIL Image\n", - " image = Image.frombytes(\"RGB\", [pix.width, pix.height], pix.samples)\n", + " with pdfplumber.open(pdf_content) as pdf:\n", + " for page_num, page in enumerate(pdf.pages):\n", + " # Convert page to image\n", + " img = page.to_image()\n", + " image = img.original\n", " \n", " # Resize if needed\n", " if image.size[0] > max_size[0] or image.size[1] > max_size[1]:\n", " image.thumbnail(max_size, Image.Resampling.LANCZOS)\n", " \n", - " # Convert to bytes\n", " img_byte_arr = io.BytesIO()\n", " image.save(img_byte_arr, format='PNG', optimize=True, quality=quality)\n", " img_byte_arr.seek(0)\n", @@ -213,7 +212,7 @@ " except Exception as e:\n", " print(f\"Error converting PDF to images: {e}\")\n", " return None\n", - "\n", + " \n", "def upload_images_to_s3(images, bucket_name, s3_prefix, pdf_name):\n", " \"\"\"Upload images to S3\"\"\"\n", " s3_client = boto3.client('s3')\n", diff --git a/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/requirements.txt b/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/requirements.txt index 43c14dce..4726f213 100644 --- a/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/requirements.txt +++ b/multimodal-understanding/repeatable-patterns/16-multimodal-agentic-workflow/requirements.txt @@ -28,7 +28,9 @@ sphinx==8.1.3 docutils>=0.20,<0.22 Events fitz -PyMuPDF frontend mkl -pydub \ No newline at end of file +pydub +pdfplumber +pillow +tools \ No newline at end of file