From c8f759734537d87193be62b21d32015c12c69cae Mon Sep 17 00:00:00 2001 From: Barabazs <31799121+Barabazs@users.noreply.github.com> Date: Fri, 17 Oct 2025 15:20:04 +0000 Subject: [PATCH 1/2] feat: add hotwords argument to CLI for improved recognition of rare terms --- whisperx/__main__.py | 1 + whisperx/transcribe.py | 1 + 2 files changed, 2 insertions(+) diff --git a/whisperx/__main__.py b/whisperx/__main__.py index 5102bc0a..e7cc62f0 100644 --- a/whisperx/__main__.py +++ b/whisperx/__main__.py @@ -58,6 +58,7 @@ def cli(): parser.add_argument("--suppress_numerals", action="store_true", help="whether to suppress numeric symbols and currency symbols during sampling, since wav2vec2 cannot align them correctly") parser.add_argument("--initial_prompt", type=str, default=None, help="optional text to provide as a prompt for the first window.") + parser.add_argument("--hotwords", type=str, default=None, help="hotwords/hint phrases to the model (e.g. \"WhisperX, PyAnnote, GPU\"); improves recognition of rare/technical terms") parser.add_argument("--condition_on_previous_text", type=str2bool, default=False, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop") parser.add_argument("--fp16", type=str2bool, default=True, help="whether to perform inference in fp16; True by default") diff --git a/whisperx/transcribe.py b/whisperx/transcribe.py index 11110c64..04c2ab36 100644 --- a/whisperx/transcribe.py +++ b/whisperx/transcribe.py @@ -106,6 +106,7 @@ def transcribe_task(args: dict, parser: argparse.ArgumentParser): "no_speech_threshold": args.pop("no_speech_threshold"), "condition_on_previous_text": False, "initial_prompt": args.pop("initial_prompt"), + "hotwords": args.pop("hotwords"), "suppress_tokens": [int(x) for x in args.pop("suppress_tokens").split(",")], "suppress_numerals": args.pop("suppress_numerals"), } From 6e1d1caaf4963a4b2f0c6999a0c54486b105e038 Mon Sep 17 00:00:00 2001 From: JulianFP Date: Tue, 13 May 2025 02:45:33 +0200 Subject: [PATCH 2/2] fix: incorrect type annotation in get_writer return value The audio_path attribute that the __call__ method of the ResultWriter class takes is a str, not TextIO --- whisperx/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/whisperx/utils.py b/whisperx/utils.py index dfe3cf2e..ada0deb9 100644 --- a/whisperx/utils.py +++ b/whisperx/utils.py @@ -410,7 +410,7 @@ def write_result(self, result: dict, file: TextIO, options: dict): def get_writer( output_format: str, output_dir: str -) -> Callable[[dict, TextIO, dict], None]: +) -> Callable[[dict, str, dict], None]: writers = { "txt": WriteTXT, "vtt": WriteVTT, @@ -425,7 +425,7 @@ def get_writer( if output_format == "all": all_writers = [writer(output_dir) for writer in writers.values()] - def write_all(result: dict, file: TextIO, options: dict): + def write_all(result: dict, file: str, options: dict): for writer in all_writers: writer(result, file, options)