Skip to content

Commit 70d3c9d

Browse files
[apps/hal9] Support multiple file upload (#484)
1 parent 2a5ed64 commit 70d3c9d

File tree

3 files changed

+130
-98
lines changed

3 files changed

+130
-98
lines changed

apps/hal9/app.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, is_url, download_file, generate_text_embeddings_parquet
1+
from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, is_url, is_url_list, process_url
22
from tools.calculator import solve_math_problem_description, solve_math_problem
33
from tools.generic import answer_generic_question_description, answer_generic_question
44
from tools.csv_agent import analyze_csv_description, analyze_csv
5-
from tools.image_agent import images_management_system, images_management_system_description, add_images_descriptions
5+
from tools.image_agent import images_management_system, images_management_system_description
66
from tools.hal9 import answer_hal9_questions_description, answer_hal9_questions
77
from tools.text_agent import analyze_text_file_description, analyze_text_file
88
from tools.streamlit import streamlit_generator, streamlit_generator_description
@@ -16,6 +16,7 @@
1616

1717
# load messages
1818
messages = load_messages()
19+
print(messages)
1920

2021
# load tools
2122
tools_descriptions = [python_execution_description, final_response_description, solve_math_problem_description, answer_generic_question_description, analyze_csv_description, images_management_system_description, answer_hal9_questions_description, analyze_text_file_description, fastapi_generator_description, streamlit_generator_description, shiny_generator_description, website_generator_description]
@@ -27,18 +28,15 @@
2728
for the task. 2. Execute the tool and process its response. 3. If the tool provides a valid result, return it to the user. 4. If the tool fails, do NOT retry with the same tool. Instead,
2829
explain the failure and suggest improvements in the prompt or alternative approaches.""")
2930
user_input = input()
30-
if is_url(user_input):
31-
h9.event("Uploaded File", f"{user_input}")
32-
filename = user_input.split("/")[-1]
33-
file_extension = filename.split(".")[-1] if "." in filename else "No extension"
34-
download_file(user_input)
35-
messages = insert_message(messages, "system", f"Consider use the file available at path: './.storage/.{filename}' for the following questions.")
36-
messages = insert_message(messages, "assistant", f"I'm ready to answer questions about your file: {filename}")
37-
if file_extension.lower() == "pdf":
38-
generate_text_embeddings_parquet(user_input)
39-
if file_extension.lower() in ['jpg', 'jpeg', 'png','webp']:
40-
add_images_descriptions(f"./.storage/.{filename}")
41-
print(f"I'm ready to answer questions about your file: {filename}")
31+
print(user_input)
32+
33+
if is_url(user_input) or is_url_list(user_input):
34+
if is_url_list(user_input):
35+
for url in user_input.split(","):
36+
url = url.strip()
37+
messages = process_url(url, messages)
38+
else:
39+
messages = process_url(user_input.strip(), messages)
4240
else:
4341
h9.event("User Prompt", f"{user_input}")
4442
user_input = user_input.replace("\f", "\n")
@@ -57,4 +55,5 @@
5755
if tool_calls[0].function.name == "final_response":
5856
break
5957
if max_steps == steps:
60-
print("Unable to generate a satisfactory response on time")
58+
print("Unable to generate a satisfactory response on time")
59+
print("\n\nMensajes finales:\n\n", messages)

apps/hal9/tools/image_agent.py

Lines changed: 1 addition & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,17 @@
11
import shutil
22
from replicate import Client
3-
from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, load_json_file
3+
from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, load_json_file, add_images_descriptions
44
from PIL import Image
55
from io import BytesIO
66
from clients import openai_client
77
import os
88
import base64
99
from mimetypes import guess_type
10-
import json
1110

1211
replicate = Client(api_token=os.environ['HAL9_TOKEN'], base_url="https://api.hal9.com/proxy/server=https://api.replicate.com")
1312

1413
########################### Functions ##########################
1514

16-
def add_images_descriptions(image_path):
17-
description = generate_description(image_path)
18-
19-
file_name = './.storage/.images_description.json'
20-
21-
if os.path.exists(file_name):
22-
with open(file_name, 'r') as file:
23-
data = json.load(file)
24-
else:
25-
data = []
26-
27-
new_record = {
28-
"image_path": image_path,
29-
"image_description": description
30-
}
31-
32-
data.append(new_record)
33-
34-
with open(file_name, 'w') as file:
35-
json.dump(data, file, indent=4)
36-
37-
return description
3815

3916
def generate_img_url(image_path):
4017
mime_type, _ = guess_type(image_path)
@@ -46,27 +23,6 @@ def generate_img_url(image_path):
4623

4724
return f"data:{mime_type};base64,{base64_encoded_data}"
4825

49-
def generate_description(image_path):
50-
try:
51-
file_input = open(image_path, 'rb')
52-
input = {
53-
"image": file_input,
54-
"prompt": """Generate a detailed image prompt that includes all specific visual details in the image. This should include precise descriptions of colors, textures, lighting, positions of all elements, proportions, background details,
55-
foreground details, and any unique stylistic choices. Ensure the description is exhaustive enough to allow an artist or AI to recreate the image accurately without visual reference."""
56-
}
57-
58-
description = ""
59-
for event in replicate.stream(
60-
"yorickvp/llava-13b:80537f9eead1a5bfa72d5ac6ea6414379be41d4d4f6679fd776e9535d1eb58bb",
61-
input=input
62-
):
63-
description+=event.data
64-
file_input.close()
65-
except Exception as e:
66-
return (f"Couldn't describe that image. -> Error: {e}")
67-
68-
return description.replace("{", "").replace("}", "")
69-
7026
def image_generator(prompt, filename):
7127
try:
7228
output = replicate.run("black-forest-labs/flux-dev", input={"prompt": prompt})

apps/hal9/utils.py

Lines changed: 115 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import json
22
import os
33
import urllib.parse
4-
import urllib.request
54
import requests
6-
from typing import Literal, List, Dict, Any, Union, Optional
5+
from typing import Literal, List, Dict, Any, Optional
76
from clients import openai_client, groq_client
87
from openai import OpenAI
98
import fitz
@@ -13,8 +12,9 @@
1312
import ast
1413
import re
1514
import hal9 as h9
15+
from replicate import Client
1616

17-
# Define the allowed client types.
17+
# Define the allowed client types.
1818
ClientType = Literal["openai", "groq"]
1919

2020
def get_client(client_type: ClientType) -> OpenAI:
@@ -262,41 +262,46 @@ def process_chunk(chunk_info):
262262
"page": page_num + 1 # Page numbers start from 1
263263
}
264264

265-
def generate_text_embeddings_parquet(url, model="text-embedding-3-small", client_type="openai", n_words=300, overlap=0, max_threads=8):
266-
# Download and read the PDF
267-
response = requests.get(url)
268-
pdf_document = fitz.open(stream=BytesIO(response.content))
269-
270-
# Prepare chunk info for parallel processing
271-
chunk_info_list = []
272-
for page_num in range(len(pdf_document)):
273-
page = pdf_document[page_num]
274-
page_text = page.get_text()
275-
276-
# Split the page text into chunks
277-
text_chunks = split_text(page_text, n_words=n_words, overlap=overlap)
278-
279-
# Add chunk info to the list
280-
for chunk in text_chunks:
281-
chunk_info_list.append((chunk, page_num, model, client_type))
282-
283-
pdf_document.close()
284-
285-
# Process chunks in parallel
286-
rows = []
287-
with ThreadPoolExecutor(max_threads) as executor:
288-
for result in executor.map(process_chunk, chunk_info_list):
289-
rows.append(result)
290-
291-
# Create the DataFrame
292-
df = pd.DataFrame(rows)
293-
294-
# Add a global chunk ID column
295-
df['chunk_id'] = range(len(df))
296-
df['filename'] = '.' + url.split("/")[-1]
265+
def generate_text_embeddings_parquet(
266+
url,
267+
model="text-embedding-3-small",
268+
client_type="openai",
269+
n_words=300,
270+
overlap=0,
271+
max_threads=8,
272+
storage_path="./.storage/.text_files.parquet"
273+
):
274+
# Download PDF
275+
resp = requests.get(url)
276+
doc = fitz.open(stream=BytesIO(resp.content))
277+
278+
# Prepare chunks
279+
tasks = []
280+
for i in range(len(doc)):
281+
text = doc[i].get_text()
282+
for chunk in split_text(text, n_words=n_words, overlap=overlap):
283+
tasks.append((chunk, i, model, client_type))
284+
doc.close()
285+
286+
# Process in parallel
287+
rows = list(ThreadPoolExecutor(max_threads).map(process_chunk, tasks))
288+
289+
# Build new DataFrame
290+
df_new = pd.DataFrame(rows)
291+
df_new['chunk_id'] = range(len(df_new))
292+
df_new['filename'] = os.path.basename(url)
293+
294+
os.makedirs(os.path.dirname(storage_path), exist_ok=True)
295+
296+
# Load existing and append
297+
if os.path.exists(storage_path):
298+
df_old = pd.read_parquet(storage_path, engine="pyarrow")
299+
df = pd.concat([df_old, df_new], ignore_index=True)
300+
else:
301+
df = df_new
297302

298-
# Save as Parquet
299-
df.to_parquet("./.storage/.text_files.parquet", engine="pyarrow", index=False)
303+
# Save all
304+
df.to_parquet(storage_path, engine="pyarrow", index=False)
300305

301306
def load_json_file(json_path):
302307
if os.path.exists(json_path):
@@ -307,4 +312,76 @@ def load_json_file(json_path):
307312
def extract_code_block(code: str, language: str) -> str:
308313
pattern = rf"```{language}\n(.*?)```"
309314
match = re.search(pattern, code, re.DOTALL)
310-
return match.group(1) if match else ""
315+
return match.group(1) if match else ""
316+
317+
318+
def is_url_list(prompt):
319+
urls_list = prompt.split(",")
320+
for url in urls_list:
321+
result = urllib.parse.urlparse(url.strip())
322+
if not all([result.scheme, result.netloc]):
323+
return False
324+
return True
325+
326+
def add_images_descriptions(image_path):
327+
description = generate_description(image_path)
328+
329+
file_name = './.storage/.images_description.json'
330+
331+
if os.path.exists(file_name):
332+
with open(file_name, 'r') as file:
333+
data = json.load(file)
334+
else:
335+
data = []
336+
337+
new_record = {
338+
"image_path": image_path,
339+
"image_description": description
340+
}
341+
342+
data.append(new_record)
343+
344+
with open(file_name, 'w') as file:
345+
json.dump(data, file, indent=4)
346+
347+
return description
348+
349+
replicate = Client(api_token=os.environ['HAL9_TOKEN'], base_url="https://api.hal9.com/proxy/server=https://api.replicate.com")
350+
351+
def generate_description(image_path):
352+
try:
353+
file_input = open(image_path, 'rb')
354+
input = {
355+
"image": file_input,
356+
"prompt": """Generate a detailed image prompt that includes all specific visual details in the image. This should include precise descriptions of colors, textures, lighting, positions of all elements, proportions, background details,
357+
foreground details, and any unique stylistic choices. Ensure the description is exhaustive enough to allow an artist or AI to recreate the image accurately without visual reference."""
358+
}
359+
360+
description = ""
361+
for event in replicate.stream(
362+
"yorickvp/llava-13b:80537f9eead1a5bfa72d5ac6ea6414379be41d4d4f6679fd776e9535d1eb58bb",
363+
input=input
364+
):
365+
description+=event.data
366+
file_input.close()
367+
except Exception as e:
368+
return (f"Couldn't describe that image. -> Error: {e}")
369+
370+
return description.replace("{", "").replace("}", "")
371+
372+
def process_url(url, messages):
373+
h9.event("Uploaded File", f"{url}")
374+
filename = url.split("/")[-1]
375+
file_extension = filename.split(".")[-1] if "." in filename else "No extension"
376+
377+
download_file(url)
378+
messages = insert_message(messages, "system", f"Consider use the file available at path: './.storage/.{filename}' for the following questions.")
379+
messages = insert_message(messages, "assistant", f"I'm ready to answer questions about your file: {filename}")
380+
381+
if file_extension.lower() == "pdf":
382+
generate_text_embeddings_parquet(url)
383+
elif file_extension.lower() in ['jpg', 'jpeg', 'png', 'webp']:
384+
add_images_descriptions(f"./.storage/.{filename}")
385+
386+
print(f"I'm ready to answer questions about your file: {filename}")
387+
return messages

0 commit comments

Comments
 (0)