geerlingguy · gergob · Feb 2, 2025 · Feb 2, 2025 · Feb 2, 2025
diff --git a/README.md b/README.md
@@ -2,6 +2,8 @@
 
 [![.github/workflows/shellcheck.yaml](https://github.com/geerlingguy/ollama-benchmark/actions/workflows/shellcheck.yaml/badge.svg)](https://github.com/geerlingguy/ollama-benchmark/actions/workflows/shellcheck.yaml)
 
+## Linux
+
 This bash script benchmarks ollama on any system where it is installed.
 
 For a quick installation, try:
@@ -26,7 +28,7 @@ Then run this benchmark script:
 
 Uninstall Ollama following the [official uninstall instructions](https://github.com/ollama/ollama/blob/main/docs/linux.md#uninstall).
 
-## CLI Options
+###CLI Options
 
 ```
 Usage: ./obench.sh [OPTIONS]
@@ -39,12 +41,32 @@ Options:
  --markdown      Format output as markdown
 ```
 
+## Windows
+
+This PowerShell script benchmarks ollama on any system where it is installed.
+
+> Make sure you have Ollama installed on your windows machine
+
+### CLI Options
+
+```
+Usage: .\obench.ps1 [OPTIONS]
+Options:
+ -Help      Display this help message
+ -Default   Run a benchmark using some default small models
+ -Model     Specify a model to use
+ -Count     Number of times to run the benchmark
+ -Ollama-bin    Point to ollama executable or command (e.g if using Docker)
+ -Markdown      Format output as markdown
+```
+
+
 ## Findings
 
 ### DeepSeek
 
 | System | CPU/GPU | Model | Eval Rate | Power (Peak) |
-| :--- | :--- | :--- | :--- | :--- |
+| :--- | :--- | :--- | ---: | :--- |
 | Pi 5 - 16GB | CPU | deepseek-r1:14b | 1.20 Tokens/s | 13.0 W |
 | Pi 5 - 16GB (AMD Pro W7700<sup>1</sup>) | GPU | deepseek-r1:14b | 19.90 Tokens/s | 164 W |
 | GMKtek G3 Plus (Intel N150) - 16GB | CPU | deepseek-r1:1.5b | 17.02 Tokens/s | 25.6 W |
@@ -53,11 +75,15 @@ Options:
 | AmpereOne A192-32X - 512GB | CPU | deepseek-r1:671b | 4.18 Tokens/s | 477 W |
 | M1 Ultra (48 GPU Core) 64GB | GPU | deepseek-r1:1.5b | 126.21 Tokens/s | N/A |
 | M1 Ultra (48 GPU Core) 64GB | GPU | deepseek-r1:14b | 35.89 Tokens/s | N/A |
+| M1 Macbook Air (8 GPU Core) 8GB | GPU | deepseek-r1:8b | 9.09 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | deepseek-r1:8b | 67.99 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | deepseek-r1:14b | 37.53 Tokens/s | N/A |
+
 
 ### Llama
 
 | System | CPU/GPU | Model | Eval Rate | Power (Peak) |
-| :--- | :--- | :--- | :--- | :--- |
+| :--- | :--- | :--- | ---: | :--- |
 | Pi 400 - 4GB | CPU | llama3.2:3b | 1.60 Tokens/s | 6 W |
 | Pi 5 - 8GB | CPU | llama3.2:3b | 4.61 Tokens/s | 13.9 W |
 | Pi 5 - 8GB | CPU | llama3.1:8b | 1.99 Tokens/s | 13.2 W |
@@ -80,6 +106,9 @@ Options:
 | Pi 5 - 8GB (AMD Pro W7700<sup>1</sup>) | GPU | llama3.2:3b | 56.14 Tokens/s | 145 W |
 | Pi 5 - 8GB (AMD Pro W7700<sup>1</sup>) | GPU | llama3.1:8b | 39.87 Tokens/s | 52 W |
 | Pi 5 - 8GB (AMD Pro W7700<sup>1</sup>) | GPU | llama2:13b | 4.38 Tokens/s | 108 W |
+| M1 Macbook Air (8 GB) | GPU | llama3.2:8b | 22.95 Tokens/s | N/A |
+| M1 Macbook Air (8 GB) | GPU | llama3.1:8b | 9.18 Tokens/s | N/A |
+| M1 Macbook Air (8 GB) | GPU | llama2:7b | 14.12 Tokens/s | N/A |
 | M4 Mac mini (10 core - 32GB) | GPU | llama3.2:3b | 41.31 Tokens/s | 30.1 W |
 | M4 Mac mini (10 core - 32GB) | GPU | llama3.1:8b | 20.95 Tokens/s | 29.4 W |
 | M4 Mac mini (10 core - 32GB) | GPU | llama2:13b | 13.60 Tokens/s | 29.8 W |
@@ -89,6 +118,9 @@ Options:
 | M1 Max Mac Studio (10 core - 64GB) | GPU | llama3.1:70b | 7.25 Tokens/s | N/A |
 | M1 Ultra (48 GPU Core) 64GB | GPU | llama3.2:3b | 108.67 Tokens/s | N/A |
 | M1 Ultra (48 GPU Core) 64GB | GPU | llama3.1:8b | 62.28 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | llama3.1:8b | 37.83 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | llama3.2:8b | 122.38 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | llama2:7b | 62.51 Tokens/s | N/A |
 | Ryzen 9 7900X (Nvidia 4090) | GPU | llama3.2:3b | 237.05 Tokens/s | N/A |
 | Ryzen 9 7900X (Nvidia 4090) | GPU | llama3.1:8b | 148.09 Tokens/s | N/A |
 | Ryzen 9 7900X (Nvidia 4090) | GPU/CPU | llama3.1:70b | 3.10 Tokens/s | N/A |
@@ -114,6 +146,20 @@ Options:
 
 <sup>1</sup> These GPUs were tested using `llama.cpp` with Vulkan support.
 
+## qwen
+
+| System | CPU/GPU | Model | Eval Rate | Power (Peak) |
+| :--- | :--- | :--- | ---: | :--- |
+| Intel i5 13500 (AMD 7800XT) | GPU | qwen:14b | 3.56 Tokens/s | N/A |
+| Intel i5 13500 (AMD 7800XT) | GPU | qwen2.5:14b | 4.01 Tokens/s | N/A |
+
+## phi
+
+| System | CPU/GPU | Model | Eval Rate | Power (Peak) |
+| :--- | :--- | :--- | ---: | :--- |
+| Intel i5 13500 (AMD 7800XT) | GPU | phi:14b | 41.33 Tokens/s | N/A |
+
+
 ## Further Reading
 
 This script is just a quick way of comparing _one aspect_ of generative AI performance. There are _many other_ aspects that are as important (or more important) this script does _not_ cover.

diff --git a/obench.ps1 b/obench.ps1
@@ -0,0 +1,83 @@
+# PowerShell script to benchmark Ollama token generation rate
+# Inspired by https://taoofmac.com/space/blog/2024/01/20/1800
+
+param (
+    [switch]$Help,
+    [switch]$Default,
+    [string]$Model,
+    [int]$Count,
+    [string]$OllamaBin = "ollama",
+    [switch]$Markdown
+)
+
+function Show-Usage {
+    Write-Output "Usage: obench.ps1 [OPTIONS]"
+    Write-Output "Options:"
+    Write-Output " -Help      Display this help message"
+    Write-Output " -Default   Run a benchmark using some default small models"
+    Write-Output " -Model     Specify a model to use"
+    Write-Output " -Count     Number of times to run the benchmark"
+    Write-Output " -Ollama-bin    Point to ollama executable or command (e.g if using Docker)"
+    Write-Output " -Markdown      Format output as markdown"
+    exit 0
+}
+
+if ($Help) { 
+    Show-Usage
+    exit 0 
+}
+
+# Default values
+if ($Default) {
+    $Count = 3
+    $Model = "llama3.2:3b"
+}
+
+# Ensure Ollama is available
+$baseCmd = ($OllamaBin -split " ")[0]
+if (-not (Get-Command $baseCmd -ErrorAction SilentlyContinue)) {
+    Write-Error "Error: $baseCmd could not be found. Please check the path or install it."
+    exit 1
+}
+
+# Prompt for benchmark count if not provided
+if (-not $Count) {
+    $Count = Read-Host "How many times to run the benchmark?"
+}
+
+# Prompt for model if not provided
+if (-not $Model) {
+    Write-Output "Current models available locally:"
+    & $OllamaBin list
+    $Model = Read-Host "Enter model you'd like to run (e.g. llama3.2)"
+}
+
+Write-Output "Running benchmark $Count times using model: $Model"
+Write-Output ""
+if ($Markdown) {
+    Write-Output "| Run | Eval Rate (Tokens/Second) |"
+    Write-Output "|-----|---------------------------|"
+}
+
+$totalEvalRate = 0
+for ($run = 1; $run -le $Count; $run++) {
+    $result = echo "Why is the blue sky blue?" | & $OllamaBin run $Model --verbose 2>&1 | Select-String "^eval rate:"
+
+    if ($result) {
+        $evalRate = ($result -split "            ")[1]
+        $tokenValue = ($evalRate -split " ")[0]
+        $totalEvalRate += [double]$tokenValue
+        if ($Markdown) {
+            Write-Output "| $run | $evalRate tokens/s |"
+        } else {
+            Write-Output $result
+        }
+    }
+}
+
+$averageEvalRate = [math]::Round($totalEvalRate / $Count, 2)
+if ($Markdown) {
+    Write-Output "|**Average Eval Rate**| $averageEvalRate tokens/second |"
+} else {
+    Write-Output "Average Eval Rate: $averageEvalRate tokens/second"
+}