tigergraph · prinskumar-tigergraph · Nov 17, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/README.md b/README.md
@@ -103,24 +103,23 @@ Organizing the data as a knowledge graph allows a chatbot to access accurate, fa
 ### Quick Start
 
 #### Use TigerGraph Docker-Based Instance
-Set your LLM Provider (supported `openai` or `gemini`) api key as environment varabiel LLM_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
+Set your OpenAI api key as environment varabiel OPENAI_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
 ```
-curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | bash
+curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | sh
 ```
 
 The GraphRAG instances will be deployed at `./graphrag` folder and TigerGraph instance will be available at `http://localhost:14240`.
-To change installation folder, use `bash -s -- <graphrag_folder> <llm_provider>` instead of `bash` at the end of the above command.
-
-> Note: for other LLM providers, manually update `configs/server_config.json` accordingly and re-run `docker compose up -d`
+To change installation folder, use `sh -s -- <graphrag_folder>` instead of `sh` at the end of the above command.
 
 #### Use Pre-Installed TigerGraph Instance
-Similar to the above setup, and use the following command for a one-step quick deployment connecting to a pre-installed TigerGraph with default configurations:
+
+Using the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
 ```
-curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | bash
+curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | sh
 ```
 
 The GraphRAG instances will be deployed at `./graphrag` folder and connect to TigerGraph instance at `http://localhost:14240` by default.
-To change installation folder, TigerGraph instance location or username/password, use `bash -s -- <graphrag_folder> <llm_provider> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `bash` at the end of the above command.
+To change installation folder, TigerGraph instance location or username/password, use `sh -s -- <graphrag_loc> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `sh` at the end of the above command.
 
 [Go back to top](#top)
 
@@ -152,7 +151,7 @@ Here’s what the folder structure looks like:
 
 ##### Step 3: Adjust configurations
 
-Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_LLM_API_KEY>` to your own LLM_API_KEY for the LLM provider. 
+Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_OPENAI_API_KEY>` to your own OPENAI_API_KEY. 
 
 > If desired, you can also change the model to be used for the embedding service and completion service to your preferred models to adjust the output from the LLM service.
 
@@ -470,23 +469,27 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
 ```json
 {
     "llm_config": {
+        "authentication_configuration": {
+            "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
+        },
         "embedding_service": {
-            "embedding_model_service": "openai",
             "model_name": "text-embedding-3-small",
-            "authentication_configuration": {
-                "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
-            }
+            "embedding_model_service": "openai"
         },
         "completion_service": {
             "llm_service": "openai",
             "llm_model": "gpt-4.1-mini",
-            "authentication_configuration": {
-                "OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
-            },
             "model_kwargs": {
                 "temperature": 0
             },
             "prompt_path": "./common/prompts/openai_gpt4/"
+        },
+        "multimodal_service": {
+            "llm_service": "openai",
+            "llm_model": "gpt-4o-mini",
+            "model_kwargs": {
+                "temperature": 0
+            }
         }
     }
 }
@@ -546,7 +549,7 @@ And your JSON config should follow as:
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/gcp_vertexai_palm/"
+            "prompt_path": "./app/prompts/gcp_vertexai_palm/"
         }
     }
 }
@@ -583,7 +586,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0
             },
-            "prompt_path": "./common/prompts/azure_open_ai_gpt35_turbo_instruct/"
+            "prompt_path": "./app/prompts/azure_open_ai_gpt35_turbo_instruct/"
         }
     }
 }
@@ -594,27 +597,32 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
 ```json
 {
     "llm_config": {
+        "authentication_configuration": {
+            "AWS_ACCESS_KEY_ID": "YOUR_AWS_ACCESS_KEY",
+            "AWS_SECRET_ACCESS_KEY": "YOUR_AWS_SECRET_KEY",
+            "AWS_REGION_NAME": "us-west-2"
+        },
         "embedding_service": {
+            "model_name": "amazon.titan-embed-text-v1",
             "embedding_model_service": "bedrock",
-            "model_name":"amazon.titan-embed-text-v2",
-            "region_name":"us-west-2",
-            "authentication_configuration": {
-                "AWS_ACCESS_KEY_ID": "ACCESS_KEY",
-                "AWS_SECRET_ACCESS_KEY": "SECRET"
-            }
+            "dimensions": 1536
         },
         "completion_service": {
             "llm_service": "bedrock",
-            "llm_model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
-            "region_name":"us-west-2",
-            "authentication_configuration": {
-                "AWS_ACCESS_KEY_ID": "ACCESS_KEY",
-                "AWS_SECRET_ACCESS_KEY": "SECRET"
-            },
+            "llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
             "model_kwargs": {
                 "temperature": 0,
+                "max_tokens": 4096
             },
-            "prompt_path": "./common/prompts/aws_bedrock_claude3haiku/"
+            "prompt_path": "./common/prompts/openai_gpt4/"
+        },
+        "multimodal_service": {
+            "llm_service": "bedrock",
+            "llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
+            "model_kwargs": {
+                "temperature": 0,
+                "max_tokens": 4096
+            }
         }
     }
 }
@@ -640,7 +648,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
             "model_kwargs": {
                 "temperature": 0.0000001
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
@@ -670,7 +678,7 @@ Example configuration for a model on Hugging Face with a dedicated endpoint is s
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }
@@ -697,7 +705,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/llama_70b/"
+            "prompt_path": "./app/prompts/llama_70b/"
         }
     }
 }
@@ -724,7 +732,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
             "model_kwargs": {
                 "temperature": 0.1
             },
-            "prompt_path": "./common/prompts/openai_gpt4/"
+            "prompt_path": "./app/prompts/openai_gpt4/"
         }
     }
 }

diff --git a/common/requirements.txt b/common/requirements.txt
@@ -108,9 +108,10 @@ ordered-set==4.1.0
 orjson==3.10.18
 packaging==24.2
 pandas==2.2.3
-#pathtools==0.1.2
+pathtools==0.1.2
 pillow==11.2.1
-PyMuPDF==1.26.4
+#PyMuPDF==1.26.4
+pymupdf4llm==0.2.0
 platformdirs==4.3.8
 pluggy==1.6.0
 prometheus_client==0.22.1

diff --git a/common/utils/image_data_extractor.py b/common/utils/image_data_extractor.py
@@ -11,155 +11,54 @@
 
 logger = logging.getLogger(__name__)
 
-
-
-def describe_image_with_llm(image_input):
+def describe_image_with_llm(file_path):
     """
-    Send image (pixmap or PIL image) to LLM vision model and return description.
-    Uses multimodal_service from config if available, otherwise falls back to completion_service.
-    Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
+    Read image file and convert to base64 to send to LLM.
     """
     try:
+        from PIL import Image as PILImage
+
         client = get_multimodal_service()
         if not client:
             return "[Image: Failed to create multimodal LLM client]"
-
+
+        # Read image and convert to base64
+        pil_image = PILImage.open(file_path)
         buffer = io.BytesIO()
-        # Convert to RGB if needed for better compatibility
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
+        if pil_image.mode != 'RGB':
+            pil_image = pil_image.convert('RGB')
+        pil_image.save(buffer, format="JPEG", quality=95)
+        image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
 
-        # Build messages (system + human)
         messages = [
-        SystemMessage(
-            content="You are a helpful assistant that describes images concisely for document analysis."
-        ),
-        HumanMessage(
-            content=[
-                {
-                    "type": "text",
-                    "text": (
-                        "Please describe what you see in this image and "
-                        "if the image has scanned text then extract all the text. "
-                        "if the image has any logo, icon, or branding element, try to describe it with text. "
-                        "Focus on any text, diagrams, charts, or other visual elements."
-                        "If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
-                    ),
-                },
-                 {
-                     "type": "image_url",
-                     "image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
-                 },
-            ]
-        ),
+            SystemMessage(
+                content="You are a helpful assistant that describes images concisely for document analysis."
+            ),
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Please describe what you see in this image and "
+                            "if the image has scanned text then extract all the text. "
+                            "If the image has any graph, chart, table, or other diagram, describe it. "
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
+                    },
+                ],
+            ),
         ]
 
-        # Get response from LangChain LLM client
-        # Access the underlying LangChain client
         langchain_client = client.llm
         response = langchain_client.invoke(messages)
 
-        return response.content if hasattr(response, 'content') else str(response)
+        return response.content if hasattr(response, "content") else str(response)
 
     except Exception as e:
         logger.error(f"Failed to describe image with LLM: {str(e)}")
         return "[Image: Error processing image description]"
 
 
-def save_image_and_get_markdown(image_input, context_info="", graphname=None):
-    """
-    Save image locally to static/images/ folder and return markdown reference with description.
-
-    LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
-    Images are saved as files and served via /ui/images/ endpoint with img:// protocol.
-
-    For NEW direct loading approach, images are stored in Image vertex as base64
-    and served via /ui/image_vertex/ endpoint with image:// protocol.
-
-    Args:
-        image_input: PIL Image object
-        context_info: Optional context (e.g., "page 3 of invoice.pdf")
-        graphname: Graph name to organize images by graph (optional)
-
-    Returns:
-        dict with:
-            - 'markdown': Markdown string with img:// reference
-            - 'image_id': Unique identifier for the saved image
-            - 'image_path': Path where image was saved to static/images/
-    """
-    try:
-        # FIRST: Get description from LLM to check if it's a logo
-        description = describe_image_with_llm(image_input)
-
-        # Check if the image is a logo, icon, or decorative element BEFORE saving
-        # These should be filtered out as they're not content-relevant
-        description_lower = description.lower()
-        logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']
-
-        if any(indicator in description_lower for indicator in logo_indicators):
-            logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
-            return None
-
-        # If not a logo, proceed with saving the image
-        # Generate unique image ID using hash of image content
-        buffer = io.BytesIO()
-        if image_input.mode != 'RGB':
-            image_input = image_input.convert('RGB')
-        image_input.save(buffer, format="JPEG", quality=95)
-        image_bytes = buffer.getvalue()
-
-        # Create hash-based ID (deterministic for same image)
-        image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
-        image_id = f"{image_hash}.jpg"
-
-        # Save image to local storage directory organized by graphname
-        project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-        # If graphname is provided, organize images by graph
-        if graphname:
-            images_dir = os.path.join(project_root, "static", "images", graphname)
-            # Include graphname in the image reference for URL construction
-            image_reference = f"{graphname}/{image_id}"
-        else:
-            images_dir = os.path.join(project_root, "static", "images")
-            image_reference = image_id
-
-        os.makedirs(images_dir, exist_ok=True)
-
-        image_path = os.path.join(images_dir, image_id)
-
-        # Save image file (skip if already exists with same hash)
-        if not os.path.exists(image_path):
-            with open(image_path, 'wb') as f:
-                f.write(image_bytes)
-            logger.info(f"Saved content image to: {image_path}")
-        else:
-            logger.debug(f"Image already exists: {image_path}")
-
-        # Generate markdown with custom img:// protocol (will be replaced later)
-        # Format: ![description](img://graphname/image_id) or ![description](img://image_id)
-        markdown = f"![{description}](img://{image_reference})"
-
-        logger.info(f"Created image reference: {image_reference} with description")
-
-        return {
-            'markdown': markdown,
-            'image_id': image_reference,
-            'image_path': image_path,
-            'description': description
-        }
-
-    except Exception as e:
-        logger.error(f"Failed to save image and generate markdown: {str(e)}")
-        # Fallback to text description only
-        fallback_desc = f"[Image: {context_info} - processing failed]"
-        return {
-            'markdown': fallback_desc,
-            'image_id': None,
-            'image_path': None,
-            'description': fallback_desc
-        }
-
-