[apps/hal9] Support multiple file upload (#484)

LuisGuillen03 · web-flow · commit 70d3c9d808cd · 2025-04-21T17:50:03.000+02:00
diff --git a/apps/hal9/app.py b/apps/hal9/app.py
@@ -1,8 +1,8 @@
-from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, is_url, download_file, generate_text_embeddings_parquet
+from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, is_url, is_url_list, process_url
 from tools.calculator import solve_math_problem_description, solve_math_problem
 from tools.generic import answer_generic_question_description, answer_generic_question
 from tools.csv_agent import analyze_csv_description, analyze_csv
-from tools.image_agent import images_management_system, images_management_system_description, add_images_descriptions
+from tools.image_agent import images_management_system, images_management_system_description
 from tools.hal9 import answer_hal9_questions_description, answer_hal9_questions
 from tools.text_agent import analyze_text_file_description, analyze_text_file
 from tools.streamlit import streamlit_generator, streamlit_generator_description
@@ -16,6 +16,7 @@
 
 # load messages
 messages = load_messages()
+print(messages)
 
 # load tools
 tools_descriptions = [python_execution_description, final_response_description, solve_math_problem_description, answer_generic_question_description, analyze_csv_description, images_management_system_description, answer_hal9_questions_description, analyze_text_file_description, fastapi_generator_description, streamlit_generator_description, shiny_generator_description, website_generator_description]
@@ -27,18 +28,15 @@
                                for the task. 2. Execute the tool and process its response. 3. If the tool provides a valid result, return it to the user. 4. If the tool fails, do NOT retry with the same tool. Instead, 
                                explain the failure and suggest improvements in the prompt or alternative approaches.""")
 user_input = input()
-if is_url(user_input):
-    h9.event("Uploaded File", f"{user_input}")
-    filename = user_input.split("/")[-1]
-    file_extension = filename.split(".")[-1] if "." in filename else "No extension"
-    download_file(user_input)
-    messages = insert_message(messages, "system", f"Consider use the file available at path: './.storage/.{filename}' for the following questions.")
-    messages = insert_message(messages, "assistant", f"I'm ready to answer questions about your file: {filename}")
-    if file_extension.lower() == "pdf":
-        generate_text_embeddings_parquet(user_input)
-    if file_extension.lower() in ['jpg', 'jpeg', 'png','webp']:
-        add_images_descriptions(f"./.storage/.{filename}")
-    print(f"I'm ready to answer questions about your file: {filename}")
+print(user_input)
+
+if is_url(user_input) or is_url_list(user_input):
+    if is_url_list(user_input):
+        for url in user_input.split(","):
+            url = url.strip()
+            messages = process_url(url, messages)
+    else:
+        messages = process_url(user_input.strip(), messages)
 else:
     h9.event("User Prompt", f"{user_input}")
     user_input = user_input.replace("\f", "\n")
@@ -57,4 +55,5 @@
         if tool_calls[0].function.name == "final_response":
             break
     if max_steps == steps:
-        print("Unable to generate a satisfactory response on time")
+        print("Unable to generate a satisfactory response on time")
+print("\n\nMensajes finales:\n\n", messages)
diff --git a/apps/hal9/tools/image_agent.py b/apps/hal9/tools/image_agent.py
@@ -1,40 +1,17 @@
 import shutil
 from replicate import Client
-from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, load_json_file
+from utils import generate_response, load_messages, insert_message, execute_function, save_messages, insert_tool_message, load_json_file, add_images_descriptions
 from PIL import Image
 from io import BytesIO
 from clients import openai_client
 import os
 import base64
 from mimetypes import guess_type
-import json
 
 replicate = Client(api_token=os.environ['HAL9_TOKEN'], base_url="https://api.hal9.com/proxy/server=https://api.replicate.com")
 
 ########################### Functions ##########################
 
-def add_images_descriptions(image_path):
-    description = generate_description(image_path)
-
-    file_name = './.storage/.images_description.json'
-
-    if os.path.exists(file_name):
-        with open(file_name, 'r') as file:
-            data = json.load(file)
-    else:
-        data = []
-
-    new_record = {
-        "image_path": image_path,
-        "image_description": description
-    }
-
-    data.append(new_record)
-
-    with open(file_name, 'w') as file:
-        json.dump(data, file, indent=4)
-
-    return description
 
 def generate_img_url(image_path):
     mime_type, _ = guess_type(image_path)
@@ -46,27 +23,6 @@ def generate_img_url(image_path):
 
     return f"data:{mime_type};base64,{base64_encoded_data}"
 
-def generate_description(image_path):
-    try:
-        file_input = open(image_path, 'rb')
-        input = {
-            "image": file_input,
-            "prompt": """Generate a detailed image prompt that includes all specific visual details in the image. This should include precise descriptions of colors, textures, lighting, positions of all elements, proportions, background details, 
-            foreground details, and any unique stylistic choices. Ensure the description is exhaustive enough to allow an artist or AI to recreate the image accurately without visual reference."""
-        }
-
-        description = ""
-        for event in replicate.stream(
-            "yorickvp/llava-13b:80537f9eead1a5bfa72d5ac6ea6414379be41d4d4f6679fd776e9535d1eb58bb",
-            input=input
-        ):
-          description+=event.data
-        file_input.close()
-    except Exception as e: 
-        return (f"Couldn't describe that image. -> Error: {e}")
-    
-    return description.replace("{", "").replace("}", "")
-
 def image_generator(prompt, filename):
     try:
       output = replicate.run("black-forest-labs/flux-dev", input={"prompt": prompt})
diff --git a/apps/hal9/utils.py b/apps/hal9/utils.py
@@ -1,9 +1,8 @@
 import json
 import os
 import urllib.parse
-import urllib.request
 import requests
-from typing import Literal, List, Dict, Any, Union, Optional
+from typing import Literal, List, Dict, Any, Optional
 from clients import openai_client, groq_client
 from openai import OpenAI
 import fitz
@@ -13,8 +12,9 @@
 import ast
 import re
 import hal9 as h9
+from replicate import Client
 
-# Define the allowed client types.  
+# Define the allowed client types.
 ClientType = Literal["openai", "groq"]
 
 def get_client(client_type: ClientType) -> OpenAI:
@@ -262,41 +262,46 @@ def process_chunk(chunk_info):
         "page": page_num + 1  # Page numbers start from 1
     }
 
-def generate_text_embeddings_parquet(url, model="text-embedding-3-small", client_type="openai", n_words=300, overlap=0, max_threads=8):
-    # Download and read the PDF
-    response = requests.get(url)
-    pdf_document = fitz.open(stream=BytesIO(response.content))
-    
-    # Prepare chunk info for parallel processing
-    chunk_info_list = []
-    for page_num in range(len(pdf_document)):
-        page = pdf_document[page_num]
-        page_text = page.get_text()
-
-        # Split the page text into chunks
-        text_chunks = split_text(page_text, n_words=n_words, overlap=overlap)
-
-        # Add chunk info to the list
-        for chunk in text_chunks:
-            chunk_info_list.append((chunk, page_num, model, client_type))
-
-    pdf_document.close()
-
-    # Process chunks in parallel
-    rows = []
-    with ThreadPoolExecutor(max_threads) as executor:
-        for result in executor.map(process_chunk, chunk_info_list):
-            rows.append(result)
-
-    # Create the DataFrame
-    df = pd.DataFrame(rows)
-
-    # Add a global chunk ID column
-    df['chunk_id'] = range(len(df))
-    df['filename'] = '.' + url.split("/")[-1]
+def generate_text_embeddings_parquet(
+    url,
+    model="text-embedding-3-small",
+    client_type="openai",
+    n_words=300,
+    overlap=0,
+    max_threads=8,
+    storage_path="./.storage/.text_files.parquet"
+):
+    # Download PDF
+    resp = requests.get(url)
+    doc = fitz.open(stream=BytesIO(resp.content))
+
+    # Prepare chunks
+    tasks = []
+    for i in range(len(doc)):
+        text = doc[i].get_text()
+        for chunk in split_text(text, n_words=n_words, overlap=overlap):
+            tasks.append((chunk, i, model, client_type))
+    doc.close()
+
+    # Process in parallel
+    rows = list(ThreadPoolExecutor(max_threads).map(process_chunk, tasks))
+
+    # Build new DataFrame
+    df_new = pd.DataFrame(rows)
+    df_new['chunk_id'] = range(len(df_new))
+    df_new['filename'] = os.path.basename(url)
+
+    os.makedirs(os.path.dirname(storage_path), exist_ok=True)
+
+    # Load existing and append
+    if os.path.exists(storage_path):
+        df_old = pd.read_parquet(storage_path, engine="pyarrow")
+        df = pd.concat([df_old, df_new], ignore_index=True)
+    else:
+        df = df_new
 
-    # Save as Parquet
-    df.to_parquet("./.storage/.text_files.parquet", engine="pyarrow", index=False)
+    # Save all
+    df.to_parquet(storage_path, engine="pyarrow", index=False)
 
 def load_json_file(json_path):
     if os.path.exists(json_path):
@@ -307,4 +312,76 @@ def load_json_file(json_path):
 def extract_code_block(code: str, language: str) -> str:
     pattern = rf"```{language}\n(.*?)```"
     match = re.search(pattern, code, re.DOTALL)
-    return match.group(1) if match else ""
+    return match.group(1) if match else ""
+
+
+def is_url_list(prompt):
+    urls_list = prompt.split(",")
+    for url in urls_list:
+        result = urllib.parse.urlparse(url.strip())
+        if not all([result.scheme, result.netloc]):
+            return False
+    return True
+
+def add_images_descriptions(image_path):
+    description = generate_description(image_path)
+
+    file_name = './.storage/.images_description.json'
+
+    if os.path.exists(file_name):
+        with open(file_name, 'r') as file:
+            data = json.load(file)
+    else:
+        data = []
+
+    new_record = {
+        "image_path": image_path,
+        "image_description": description
+    }
+
+    data.append(new_record)
+
+    with open(file_name, 'w') as file:
+        json.dump(data, file, indent=4)
+
+    return description
+
+replicate = Client(api_token=os.environ['HAL9_TOKEN'], base_url="https://api.hal9.com/proxy/server=https://api.replicate.com")
+
+def generate_description(image_path):
+    try:
+        file_input = open(image_path, 'rb')
+        input = {
+            "image": file_input,
+            "prompt": """Generate a detailed image prompt that includes all specific visual details in the image. This should include precise descriptions of colors, textures, lighting, positions of all elements, proportions, background details, 
+            foreground details, and any unique stylistic choices. Ensure the description is exhaustive enough to allow an artist or AI to recreate the image accurately without visual reference."""
+        }
+
+        description = ""
+        for event in replicate.stream(
+            "yorickvp/llava-13b:80537f9eead1a5bfa72d5ac6ea6414379be41d4d4f6679fd776e9535d1eb58bb",
+            input=input
+        ):
+          description+=event.data
+        file_input.close()
+    except Exception as e: 
+        return (f"Couldn't describe that image. -> Error: {e}")
+    
+    return description.replace("{", "").replace("}", "")
+
+def process_url(url, messages):
+    h9.event("Uploaded File", f"{url}")
+    filename = url.split("/")[-1]
+    file_extension = filename.split(".")[-1] if "." in filename else "No extension"
+
+    download_file(url)
+    messages = insert_message(messages, "system", f"Consider use the file available at path: './.storage/.{filename}' for the following questions.")
+    messages = insert_message(messages, "assistant", f"I'm ready to answer questions about your file: {filename}")
+
+    if file_extension.lower() == "pdf":
+        generate_text_embeddings_parquet(url)
+    elif file_extension.lower() in ['jpg', 'jpeg', 'png', 'webp']:
+        add_images_descriptions(f"./.storage/.{filename}")
+
+    print(f"I'm ready to answer questions about your file: {filename}")
+    return messages