From 0c85287f1fe6005e77df3e44281570432aa506b8 Mon Sep 17 00:00:00 2001 From: Pradeep Date: Sun, 2 Nov 2025 17:17:26 +0100 Subject: [PATCH 1/3] feat: add prototype speech-to-text support for AI agent input (experimental) --- src/webui/components/browser_use_agent_tab.py | 138 ++++++++++++++++-- 1 file changed, 128 insertions(+), 10 deletions(-) diff --git a/src/webui/components/browser_use_agent_tab.py b/src/webui/components/browser_use_agent_tab.py index b51a1663..69b922b4 100644 --- a/src/webui/components/browser_use_agent_tab.py +++ b/src/webui/components/browser_use_agent_tab.py @@ -969,13 +969,97 @@ async def handle_clear(webui_manager: WebuiManager): # --- Tab Creation Function --- +# (Make sure all your imports from before are still at the top of the file) +# (e.g., import gradio as gr, from src.webui.webui_manager import WebuiManager, etc.) +# (DO NOT import numpy or transformers) + + +# ... (all your helper functions like _initialize_llm, _handle_new_step, etc. go here) ... + + +# --- Tab Creation Function --- + def create_browser_use_agent_tab(webui_manager: WebuiManager): """ Create the run agent tab, defining UI, state, and handlers. """ webui_manager.init_browser_use_agent() - # --- Define UI Components --- + # --- 1. NEW: Define the JavaScript for Browser Speech-to-Text --- + # This JS function will be attached to our new button. + # It finds the button and textbox by their `elem_id`s. + js_speech_function = """ + () => { + // --- THIS IS THE UPDATED PART --- + // We will try multiple ways to find the elements, just in case + // Gradio has rendered them differently. + + // Try to find the button: + // 1. A