Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.10
28 changes: 9 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ This repository provides fast automatic speech recognition (70x realtime with la

<h2 align="left" id="setup">Setup ⚙️</h2>

### 0. CUDA Installation

To use WhisperX with GPU acceleration, install the CUDA toolkit 12.8 before WhisperX. Skip this step if using only the CPU.

- For **Linux** users, install the CUDA toolkit 12.8 following this guide:
[CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/).
- For **Windows** users, download and install the CUDA toolkit 12.8:
[CUDA Downloads](https://developer.nvidia.com/cuda-12-8-1-download-archive).

### 1. Simple Installation (Recommended)

The easiest way to install WhisperX is through PyPi:
Expand Down Expand Up @@ -102,25 +111,6 @@ uv sync --all-extras --dev

You may also need to install ffmpeg, rust etc. Follow openAI instructions here https://github.com/openai/whisper#setup.

### Common Issues & Troubleshooting 🔧

#### libcudnn Dependencies (GPU Users)

If you're using WhisperX with GPU support and encounter errors like:

- `Could not load library libcudnn_ops_infer.so.8`
- `Unable to load any of {libcudnn_cnn.so.9.1.0, libcudnn_cnn.so.9.1, libcudnn_cnn.so.9, libcudnn_cnn.so}`
- `libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory`

This means your system is missing the CUDA Deep Neural Network library (cuDNN). This library is needed for GPU acceleration but isn't always installed by default.

**Install cuDNN (example for apt based systems):**

```bash
sudo apt update
sudo apt install libcudnn8 libcudnn8-dev -y
```

### Speaker Diarization

To **enable Speaker Diarization**, include your Hugging Face access token (read) that you can generate from [Here](https://huggingface.co/settings/tokens) after the `--hf_token` argument and accept the user agreement for the following models: [Segmentation](https://huggingface.co/pyannote/segmentation-3.0) and [Speaker-Diarization-3.1](https://huggingface.co/pyannote/speaker-diarization-3.1) (if you choose to use Speaker-Diarization 2.x, follow requirements [here](https://huggingface.co/pyannote/speaker-diarization) instead.)
Expand Down
42 changes: 35 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,26 @@
urls = { repository = "https://github.com/m-bain/whisperx" }
authors = [{ name = "Max Bain" }]
name = "whisperx"
version = "3.4.3"
version = "3.5.0"
description = "Time-Accurate Automatic Speech Recognition using Whisper."
readme = "README.md"
requires-python = ">=3.9, <3.13"
license = { text = "BSD-2-Clause" }

dependencies = [
"ctranslate2<4.5.0",
"ctranslate2>=4.5.0",
"faster-whisper>=1.1.1",
"nltk>=3.9.1",
"numpy>=2.0.2",
"onnxruntime>=1.19",
"pandas>=2.2.3",
# Restrict numpy, onnxruntime, pandas, av to be compatible with Python 3.9
"numpy>=2.0.2,<2.1.0",
"onnxruntime>=1.19,<1.20.0",
"pandas>=2.2.3,<2.3.0",
"av<16.0.0",
"pyannote-audio>=3.3.2,<4.0.0",
"torch>=2.5.1",
"torchaudio>=2.5.1",
"torch>=2.7.1",
"torchaudio",
"transformers>=4.48.0",
"triton>=3.3.0; sys_platform == 'linux'" # only install triton on Linux
]


Expand All @@ -34,3 +37,28 @@ include-package-data = true
[tool.setuptools.packages.find]
where = ["."]
include = ["whisperx*"]

[tool.uv.sources]
torch = [
{ index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
{ index = "pytorch-cpu", marker = "platform_machine != 'x86_64' and sys_platform != 'darwin'" },
{ index = "pytorch", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
]
torchaudio = [
{ index = "pytorch-cpu", marker = "sys_platform == 'darwin'" },
{ index = "pytorch-cpu", marker = "platform_machine != 'x86_64' and sys_platform != 'darwin'" },
{ index = "pytorch", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
]
triton = [
{ index = "pytorch", marker = "sys_platform == 'linux'" },
]

[[tool.uv.index]]
name = "pytorch"
url = "https://download.pytorch.org/whl/cu128"
explicit = true

[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true
Loading