diff --git a/README.md b/README.md
index caf3bf8..94aa79a 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
[](https://github.com/geerlingguy/ollama-benchmark/actions/workflows/shellcheck.yaml)
+## Linux
+
This bash script benchmarks ollama on any system where it is installed.
For a quick installation, try:
@@ -26,7 +28,7 @@ Then run this benchmark script:
Uninstall Ollama following the [official uninstall instructions](https://github.com/ollama/ollama/blob/main/docs/linux.md#uninstall).
-## CLI Options
+###CLI Options
```
Usage: ./obench.sh [OPTIONS]
@@ -39,12 +41,32 @@ Options:
--markdown Format output as markdown
```
+## Windows
+
+This PowerShell script benchmarks ollama on any system where it is installed.
+
+> Make sure you have Ollama installed on your windows machine
+
+### CLI Options
+
+```
+Usage: .\obench.ps1 [OPTIONS]
+Options:
+ -Help Display this help message
+ -Default Run a benchmark using some default small models
+ -Model Specify a model to use
+ -Count Number of times to run the benchmark
+ -Ollama-bin Point to ollama executable or command (e.g if using Docker)
+ -Markdown Format output as markdown
+```
+
+
## Findings
### DeepSeek
| System | CPU/GPU | Model | Eval Rate | Power (Peak) |
-| :--- | :--- | :--- | :--- | :--- |
+| :--- | :--- | :--- | ---: | :--- |
| Pi 5 - 16GB | CPU | deepseek-r1:14b | 1.20 Tokens/s | 13.0 W |
| Pi 5 - 16GB (AMD Pro W77001) | GPU | deepseek-r1:14b | 19.90 Tokens/s | 164 W |
| GMKtek G3 Plus (Intel N150) - 16GB | CPU | deepseek-r1:1.5b | 17.02 Tokens/s | 25.6 W |
@@ -53,11 +75,15 @@ Options:
| AmpereOne A192-32X - 512GB | CPU | deepseek-r1:671b | 4.18 Tokens/s | 477 W |
| M1 Ultra (48 GPU Core) 64GB | GPU | deepseek-r1:1.5b | 126.21 Tokens/s | N/A |
| M1 Ultra (48 GPU Core) 64GB | GPU | deepseek-r1:14b | 35.89 Tokens/s | N/A |
+| M1 Macbook Air (8 GPU Core) 8GB | GPU | deepseek-r1:8b | 9.09 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | deepseek-r1:8b | 67.99 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | deepseek-r1:14b | 37.53 Tokens/s | N/A |
+
### Llama
| System | CPU/GPU | Model | Eval Rate | Power (Peak) |
-| :--- | :--- | :--- | :--- | :--- |
+| :--- | :--- | :--- | ---: | :--- |
| Pi 400 - 4GB | CPU | llama3.2:3b | 1.60 Tokens/s | 6 W |
| Pi 5 - 8GB | CPU | llama3.2:3b | 4.61 Tokens/s | 13.9 W |
| Pi 5 - 8GB | CPU | llama3.1:8b | 1.99 Tokens/s | 13.2 W |
@@ -80,6 +106,9 @@ Options:
| Pi 5 - 8GB (AMD Pro W77001) | GPU | llama3.2:3b | 56.14 Tokens/s | 145 W |
| Pi 5 - 8GB (AMD Pro W77001) | GPU | llama3.1:8b | 39.87 Tokens/s | 52 W |
| Pi 5 - 8GB (AMD Pro W77001) | GPU | llama2:13b | 4.38 Tokens/s | 108 W |
+| M1 Macbook Air (8 GB) | GPU | llama3.2:8b | 22.95 Tokens/s | N/A |
+| M1 Macbook Air (8 GB) | GPU | llama3.1:8b | 9.18 Tokens/s | N/A |
+| M1 Macbook Air (8 GB) | GPU | llama2:7b | 14.12 Tokens/s | N/A |
| M4 Mac mini (10 core - 32GB) | GPU | llama3.2:3b | 41.31 Tokens/s | 30.1 W |
| M4 Mac mini (10 core - 32GB) | GPU | llama3.1:8b | 20.95 Tokens/s | 29.4 W |
| M4 Mac mini (10 core - 32GB) | GPU | llama2:13b | 13.60 Tokens/s | 29.8 W |
@@ -89,6 +118,9 @@ Options:
| M1 Max Mac Studio (10 core - 64GB) | GPU | llama3.1:70b | 7.25 Tokens/s | N/A |
| M1 Ultra (48 GPU Core) 64GB | GPU | llama3.2:3b | 108.67 Tokens/s | N/A |
| M1 Ultra (48 GPU Core) 64GB | GPU | llama3.1:8b | 62.28 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | llama3.1:8b | 37.83 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | llama3.2:8b | 122.38 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | llama2:7b | 62.51 Tokens/s | N/A |
| Ryzen 9 7900X (Nvidia 4090) | GPU | llama3.2:3b | 237.05 Tokens/s | N/A |
| Ryzen 9 7900X (Nvidia 4090) | GPU | llama3.1:8b | 148.09 Tokens/s | N/A |
| Ryzen 9 7900X (Nvidia 4090) | GPU/CPU | llama3.1:70b | 3.10 Tokens/s | N/A |
@@ -114,6 +146,20 @@ Options:
1 These GPUs were tested using `llama.cpp` with Vulkan support.
+## qwen
+
+| System | CPU/GPU | Model | Eval Rate | Power (Peak) |
+| :--- | :--- | :--- | ---: | :--- |
+| Intel i5 13500 (AMD 7800XT) | GPU | qwen:14b | 3.56 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | qwen2.5:14b | 4.01 Tokens/s | N/A |
+
+## phi
+
+| System | CPU/GPU | Model | Eval Rate | Power (Peak) |
+| :--- | :--- | :--- | ---: | :--- |
+| Intel i5 13500 (AMD 7800XT) | GPU | phi:14b | 41.33 Tokens/s | N/A |
+
+
## Further Reading
This script is just a quick way of comparing _one aspect_ of generative AI performance. There are _many other_ aspects that are as important (or more important) this script does _not_ cover.
diff --git a/obench.ps1 b/obench.ps1
new file mode 100644
index 0000000..1b21dd3
--- /dev/null
+++ b/obench.ps1
@@ -0,0 +1,83 @@
+# PowerShell script to benchmark Ollama token generation rate
+# Inspired by https://taoofmac.com/space/blog/2024/01/20/1800
+
+param (
+ [switch]$Help,
+ [switch]$Default,
+ [string]$Model,
+ [int]$Count,
+ [string]$OllamaBin = "ollama",
+ [switch]$Markdown
+)
+
+function Show-Usage {
+ Write-Output "Usage: obench.ps1 [OPTIONS]"
+ Write-Output "Options:"
+ Write-Output " -Help Display this help message"
+ Write-Output " -Default Run a benchmark using some default small models"
+ Write-Output " -Model Specify a model to use"
+ Write-Output " -Count Number of times to run the benchmark"
+ Write-Output " -Ollama-bin Point to ollama executable or command (e.g if using Docker)"
+ Write-Output " -Markdown Format output as markdown"
+ exit 0
+}
+
+if ($Help) {
+ Show-Usage
+ exit 0
+}
+
+# Default values
+if ($Default) {
+ $Count = 3
+ $Model = "llama3.2:3b"
+}
+
+# Ensure Ollama is available
+$baseCmd = ($OllamaBin -split " ")[0]
+if (-not (Get-Command $baseCmd -ErrorAction SilentlyContinue)) {
+ Write-Error "Error: $baseCmd could not be found. Please check the path or install it."
+ exit 1
+}
+
+# Prompt for benchmark count if not provided
+if (-not $Count) {
+ $Count = Read-Host "How many times to run the benchmark?"
+}
+
+# Prompt for model if not provided
+if (-not $Model) {
+ Write-Output "Current models available locally:"
+ & $OllamaBin list
+ $Model = Read-Host "Enter model you'd like to run (e.g. llama3.2)"
+}
+
+Write-Output "Running benchmark $Count times using model: $Model"
+Write-Output ""
+if ($Markdown) {
+ Write-Output "| Run | Eval Rate (Tokens/Second) |"
+ Write-Output "|-----|---------------------------|"
+}
+
+$totalEvalRate = 0
+for ($run = 1; $run -le $Count; $run++) {
+ $result = echo "Why is the blue sky blue?" | & $OllamaBin run $Model --verbose 2>&1 | Select-String "^eval rate:"
+
+ if ($result) {
+ $evalRate = ($result -split " ")[1]
+ $tokenValue = ($evalRate -split " ")[0]
+ $totalEvalRate += [double]$tokenValue
+ if ($Markdown) {
+ Write-Output "| $run | $evalRate tokens/s |"
+ } else {
+ Write-Output $result
+ }
+ }
+}
+
+$averageEvalRate = [math]::Round($totalEvalRate / $Count, 2)
+if ($Markdown) {
+ Write-Output "|**Average Eval Rate**| $averageEvalRate tokens/second |"
+} else {
+ Write-Output "Average Eval Rate: $averageEvalRate tokens/second"
+}