Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 43 additions & 35 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,24 +103,23 @@ Organizing the data as a knowledge graph allows a chatbot to access accurate, fa
### Quick Start

#### Use TigerGraph Docker-Based Instance
Set your LLM Provider (supported `openai` or `gemini`) api key as environment varabiel LLM_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
Set your OpenAI api key as environment varabiel OPENAI_API_KEY and use the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
```
curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | bash
curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag.sh | sh
```

The GraphRAG instances will be deployed at `./graphrag` folder and TigerGraph instance will be available at `http://localhost:14240`.
To change installation folder, use `bash -s -- <graphrag_folder> <llm_provider>` instead of `bash` at the end of the above command.

> Note: for other LLM providers, manually update `configs/server_config.json` accordingly and re-run `docker compose up -d`
To change installation folder, use `sh -s -- <graphrag_folder>` instead of `sh` at the end of the above command.

#### Use Pre-Installed TigerGraph Instance
Similar to the above setup, and use the following command for a one-step quick deployment connecting to a pre-installed TigerGraph with default configurations:

Using the following command for a one-step quick deployment with TigerGraph Community Edition and default configurations:
```
curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | bash
curl -k https://raw.githubusercontent.com/tigergraph/graphrag/refs/heads/main/docs/tutorials/setup_graphrag_tg.sh | sh
```

The GraphRAG instances will be deployed at `./graphrag` folder and connect to TigerGraph instance at `http://localhost:14240` by default.
To change installation folder, TigerGraph instance location or username/password, use `bash -s -- <graphrag_folder> <llm_provider> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `bash` at the end of the above command.
To change installation folder, TigerGraph instance location or username/password, use `sh -s -- <graphrag_loc> <tg_host> <tg_port> <tg_username> <tg_password>` instead of `sh` at the end of the above command.

[Go back to top](#top)

Expand Down Expand Up @@ -152,7 +151,7 @@ Here’s what the folder structure looks like:

##### Step 3: Adjust configurations

Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_LLM_API_KEY>` to your own LLM_API_KEY for the LLM provider.
Edit `llm_config` section of `configs/server_config.json` and replace `<YOUR_OPENAI_API_KEY>` to your own OPENAI_API_KEY.

> If desired, you can also change the model to be used for the embedding service and completion service to your preferred models to adjust the output from the LLM service.

Expand Down Expand Up @@ -470,23 +469,27 @@ In addition to the `OPENAI_API_KEY`, `llm_model` and `model_name` can be edited
```json
{
"llm_config": {
"authentication_configuration": {
"OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
},
"embedding_service": {
"embedding_model_service": "openai",
"model_name": "text-embedding-3-small",
"authentication_configuration": {
"OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
}
"embedding_model_service": "openai"
},
"completion_service": {
"llm_service": "openai",
"llm_model": "gpt-4.1-mini",
"authentication_configuration": {
"OPENAI_API_KEY": "YOUR_OPENAI_API_KEY_HERE"
},
"model_kwargs": {
"temperature": 0
},
"prompt_path": "./common/prompts/openai_gpt4/"
},
"multimodal_service": {
"llm_service": "openai",
"llm_model": "gpt-4o-mini",
"model_kwargs": {
"temperature": 0
}
}
}
}
Expand Down Expand Up @@ -546,7 +549,7 @@ And your JSON config should follow as:
"model_kwargs": {
"temperature": 0
},
"prompt_path": "./common/prompts/gcp_vertexai_palm/"
"prompt_path": "./app/prompts/gcp_vertexai_palm/"
}
}
}
Expand Down Expand Up @@ -583,7 +586,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
"model_kwargs": {
"temperature": 0
},
"prompt_path": "./common/prompts/azure_open_ai_gpt35_turbo_instruct/"
"prompt_path": "./app/prompts/azure_open_ai_gpt35_turbo_instruct/"
}
}
}
Expand All @@ -594,27 +597,32 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
```json
{
"llm_config": {
"authentication_configuration": {
"AWS_ACCESS_KEY_ID": "YOUR_AWS_ACCESS_KEY",
"AWS_SECRET_ACCESS_KEY": "YOUR_AWS_SECRET_KEY",
"AWS_REGION_NAME": "us-west-2"
},
"embedding_service": {
"model_name": "amazon.titan-embed-text-v1",
"embedding_model_service": "bedrock",
"model_name":"amazon.titan-embed-text-v2",
"region_name":"us-west-2",
"authentication_configuration": {
"AWS_ACCESS_KEY_ID": "ACCESS_KEY",
"AWS_SECRET_ACCESS_KEY": "SECRET"
}
"dimensions": 1536
},
"completion_service": {
"llm_service": "bedrock",
"llm_model": "us.anthropic.claude-3-7-sonnet-20250219-v1:0",
"region_name":"us-west-2",
"authentication_configuration": {
"AWS_ACCESS_KEY_ID": "ACCESS_KEY",
"AWS_SECRET_ACCESS_KEY": "SECRET"
},
"llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
"model_kwargs": {
"temperature": 0,
"max_tokens": 4096
},
"prompt_path": "./common/prompts/aws_bedrock_claude3haiku/"
"prompt_path": "./common/prompts/openai_gpt4/"
},
"multimodal_service": {
"llm_service": "bedrock",
"llm_model": "anthropic.claude-3-5-sonnet-20240620-v1:0",
"model_kwargs": {
"temperature": 0,
"max_tokens": 4096
}
}
}
}
Expand All @@ -640,7 +648,7 @@ In addition to the `AZURE_OPENAI_ENDPOINT`, `AZURE_OPENAI_API_KEY`, and `azure_d
"model_kwargs": {
"temperature": 0.0000001
},
"prompt_path": "./common/prompts/openai_gpt4/"
"prompt_path": "./app/prompts/openai_gpt4/"
}
}
}
Expand Down Expand Up @@ -670,7 +678,7 @@ Example configuration for a model on Hugging Face with a dedicated endpoint is s
"model_kwargs": {
"temperature": 0.1
},
"prompt_path": "./common/prompts/openai_gpt4/"
"prompt_path": "./app/prompts/openai_gpt4/"
}
}
}
Expand All @@ -697,7 +705,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
"model_kwargs": {
"temperature": 0.1
},
"prompt_path": "./common/prompts/llama_70b/"
"prompt_path": "./app/prompts/llama_70b/"
}
}
}
Expand All @@ -724,7 +732,7 @@ Example configuration for a model on Hugging Face with a serverless endpoint is
"model_kwargs": {
"temperature": 0.1
},
"prompt_path": "./common/prompts/openai_gpt4/"
"prompt_path": "./app/prompts/openai_gpt4/"
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions common/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,10 @@ ordered-set==4.1.0
orjson==3.10.18
packaging==24.2
pandas==2.2.3
#pathtools==0.1.2
pathtools==0.1.2
pillow==11.2.1
PyMuPDF==1.26.4
#PyMuPDF==1.26.4
pymupdf4llm==0.2.0
platformdirs==4.3.8
pluggy==1.6.0
prometheus_client==0.22.1
Expand Down
163 changes: 31 additions & 132 deletions common/utils/image_data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,155 +11,54 @@

logger = logging.getLogger(__name__)



def describe_image_with_llm(image_input):
def describe_image_with_llm(file_path):
"""
Send image (pixmap or PIL image) to LLM vision model and return description.
Uses multimodal_service from config if available, otherwise falls back to completion_service.
Currently supports: OpenAI, Azure OpenAI, Google GenAI, and Google VertexAI
Read image file and convert to base64 to send to LLM.
"""
try:
from PIL import Image as PILImage

client = get_multimodal_service()
if not client:
return "[Image: Failed to create multimodal LLM client]"


# Read image and convert to base64
pil_image = PILImage.open(file_path)
buffer = io.BytesIO()
# Convert to RGB if needed for better compatibility
if image_input.mode != 'RGB':
image_input = image_input.convert('RGB')
image_input.save(buffer, format="JPEG", quality=95)
b64_img = base64.b64encode(buffer.getvalue()).decode("utf-8")
if pil_image.mode != 'RGB':
pil_image = pil_image.convert('RGB')
pil_image.save(buffer, format="JPEG", quality=95)
image_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')

# Build messages (system + human)
messages = [
SystemMessage(
content="You are a helpful assistant that describes images concisely for document analysis."
),
HumanMessage(
content=[
{
"type": "text",
"text": (
"Please describe what you see in this image and "
"if the image has scanned text then extract all the text. "
"if the image has any logo, icon, or branding element, try to describe it with text. "
"Focus on any text, diagrams, charts, or other visual elements."
"If the image is purely a logo, icon, or branding element, start your response with 'LOGO:' or 'ICON:'."
),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{b64_img}"},
},
]
),
SystemMessage(
content="You are a helpful assistant that describes images concisely for document analysis."
),
HumanMessage(
content=[
{
"type": "text",
"text": (
"Please describe what you see in this image and "
"if the image has scanned text then extract all the text. "
"If the image has any graph, chart, table, or other diagram, describe it. "
),
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{image_base64}"},
},
],
),
]

# Get response from LangChain LLM client
# Access the underlying LangChain client
langchain_client = client.llm
response = langchain_client.invoke(messages)

return response.content if hasattr(response, 'content') else str(response)
return response.content if hasattr(response, "content") else str(response)

except Exception as e:
logger.error(f"Failed to describe image with LLM: {str(e)}")
return "[Image: Error processing image description]"


def save_image_and_get_markdown(image_input, context_info="", graphname=None):
"""
Save image locally to static/images/ folder and return markdown reference with description.

LEGACY/OLD APPROACH: Used for backward compatibility with JSONL-based loading.
Images are saved as files and served via /ui/images/ endpoint with img:// protocol.

For NEW direct loading approach, images are stored in Image vertex as base64
and served via /ui/image_vertex/ endpoint with image:// protocol.

Args:
image_input: PIL Image object
context_info: Optional context (e.g., "page 3 of invoice.pdf")
graphname: Graph name to organize images by graph (optional)

Returns:
dict with:
- 'markdown': Markdown string with img:// reference
- 'image_id': Unique identifier for the saved image
- 'image_path': Path where image was saved to static/images/
"""
try:
# FIRST: Get description from LLM to check if it's a logo
description = describe_image_with_llm(image_input)

# Check if the image is a logo, icon, or decorative element BEFORE saving
# These should be filtered out as they're not content-relevant
description_lower = description.lower()
logo_indicators = ['logo', 'icon', 'branding', 'watermark', 'trademark', 'company logo', 'brand logo']

if any(indicator in description_lower for indicator in logo_indicators):
logger.info(f"Detected logo/icon in image, skipping: {description[:100]}")
return None

# If not a logo, proceed with saving the image
# Generate unique image ID using hash of image content
buffer = io.BytesIO()
if image_input.mode != 'RGB':
image_input = image_input.convert('RGB')
image_input.save(buffer, format="JPEG", quality=95)
image_bytes = buffer.getvalue()

# Create hash-based ID (deterministic for same image)
image_hash = hashlib.sha256(image_bytes).hexdigest()[:16]
image_id = f"{image_hash}.jpg"

# Save image to local storage directory organized by graphname
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# If graphname is provided, organize images by graph
if graphname:
images_dir = os.path.join(project_root, "static", "images", graphname)
# Include graphname in the image reference for URL construction
image_reference = f"{graphname}/{image_id}"
else:
images_dir = os.path.join(project_root, "static", "images")
image_reference = image_id

os.makedirs(images_dir, exist_ok=True)

image_path = os.path.join(images_dir, image_id)

# Save image file (skip if already exists with same hash)
if not os.path.exists(image_path):
with open(image_path, 'wb') as f:
f.write(image_bytes)
logger.info(f"Saved content image to: {image_path}")
else:
logger.debug(f"Image already exists: {image_path}")

# Generate markdown with custom img:// protocol (will be replaced later)
# Format: ![description](img://graphname/image_id) or ![description](img://image_id)
markdown = f"![{description}](img://{image_reference})"

logger.info(f"Created image reference: {image_reference} with description")

return {
'markdown': markdown,
'image_id': image_reference,
'image_path': image_path,
'description': description
}

except Exception as e:
logger.error(f"Failed to save image and generate markdown: {str(e)}")
# Fallback to text description only
fallback_desc = f"[Image: {context_info} - processing failed]"
return {
'markdown': fallback_desc,
'image_id': None,
'image_path': None,
'description': fallback_desc
}


Loading
Loading