From c8f759734537d87193be62b21d32015c12c69cae Mon Sep 17 00:00:00 2001
From: Barabazs <31799121+Barabazs@users.noreply.github.com>
Date: Fri, 17 Oct 2025 15:20:04 +0000
Subject: [PATCH 1/2] feat: add hotwords argument to CLI for improved
 recognition of rare terms

---
 whisperx/__main__.py   | 1 +
 whisperx/transcribe.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/whisperx/__main__.py b/whisperx/__main__.py
index 5102bc0a..e7cc62f0 100644
--- a/whisperx/__main__.py
+++ b/whisperx/__main__.py
@@ -58,6 +58,7 @@ def cli():
     parser.add_argument("--suppress_numerals", action="store_true", help="whether to suppress numeric symbols and currency symbols during sampling, since wav2vec2 cannot align them correctly")
 
     parser.add_argument("--initial_prompt", type=str, default=None, help="optional text to provide as a prompt for the first window.")
+    parser.add_argument("--hotwords", type=str, default=None, help="hotwords/hint phrases to the model (e.g. \"WhisperX, PyAnnote, GPU\"); improves recognition of rare/technical terms")
     parser.add_argument("--condition_on_previous_text", type=str2bool, default=False, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop")
     parser.add_argument("--fp16", type=str2bool, default=True, help="whether to perform inference in fp16; True by default")
 
diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py
index 11110c64..04c2ab36 100644
--- a/whisperx/transcribe.py
+++ b/whisperx/transcribe.py
@@ -106,6 +106,7 @@ def transcribe_task(args: dict, parser: argparse.ArgumentParser):
         "no_speech_threshold": args.pop("no_speech_threshold"),
         "condition_on_previous_text": False,
         "initial_prompt": args.pop("initial_prompt"),
+        "hotwords": args.pop("hotwords"),
         "suppress_tokens": [int(x) for x in args.pop("suppress_tokens").split(",")],
         "suppress_numerals": args.pop("suppress_numerals"),
     }

From 6e1d1caaf4963a4b2f0c6999a0c54486b105e038 Mon Sep 17 00:00:00 2001
From: JulianFP <julian@partanengroup.de>
Date: Tue, 13 May 2025 02:45:33 +0200
Subject: [PATCH 2/2] fix: incorrect type annotation in get_writer return value
 The audio_path attribute that the __call__ method of the ResultWriter class
 takes is a str, not TextIO

---
 whisperx/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/whisperx/utils.py b/whisperx/utils.py
index dfe3cf2e..ada0deb9 100644
--- a/whisperx/utils.py
+++ b/whisperx/utils.py
@@ -410,7 +410,7 @@ def write_result(self, result: dict, file: TextIO, options: dict):
 
 def get_writer(
     output_format: str, output_dir: str
-) -> Callable[[dict, TextIO, dict], None]:
+) -> Callable[[dict, str, dict], None]:
     writers = {
         "txt": WriteTXT,
         "vtt": WriteVTT,
@@ -425,7 +425,7 @@ def get_writer(
     if output_format == "all":
         all_writers = [writer(output_dir) for writer in writers.values()]
 
-        def write_all(result: dict, file: TextIO, options: dict):
+        def write_all(result: dict, file: str, options: dict):
             for writer in all_writers:
                 writer(result, file, options)