diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000..cae6a1f --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,56 @@ +name: Benchmarks + +on: + # Run on pushes to main (to keep report up to date) + push: + branches: [main] + # Run weekly on Mondays at 06:00 UTC + schedule: + - cron: "0 6 * * 1" + # Allow manual trigger + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + bench: + name: Run Benchmarks + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run benchmarks and generate report + run: bun run bench:report + + - name: Upload JSON results + uses: actions/upload-artifact@v4 + with: + name: bench-results + path: reports/bench-results.json + retention-days: 90 + + - name: Commit updated report + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add reports/benchmarks.md + + if git diff --staged --quiet; then + echo "No changes to benchmark report" + else + git commit -m "docs: update benchmark report" + git push + fi diff --git a/.gitignore b/.gitignore index 0d003c2..fd26d33 100644 --- a/.gitignore +++ b/.gitignore @@ -52,5 +52,8 @@ debug/ fixtures/benchmarks/ fixtures/private/ +# Benchmark JSON results (machine-specific) +reports/bench-results.json + # Temporary files tmp/ diff --git a/benchmarks/comparison.bench.ts b/benchmarks/comparison.bench.ts index 255a57d..355c33d 100644 --- a/benchmarks/comparison.bench.ts +++ b/benchmarks/comparison.bench.ts @@ -9,10 +9,12 @@ import { PDFDocument } from "pdf-lib"; import { bench, describe } from "vitest"; import { PDF } from "../src"; -import { loadFixture, getHeavyPdf } from "./fixtures"; +import { getHeavyPdf, getSynthetic100, getSynthetic2000, loadFixture } from "./fixtures"; -// Pre-load fixture +// Pre-load fixtures const pdfBytes = await getHeavyPdf(); +const synthetic100 = await getSynthetic100(); +const synthetic2000 = await getSynthetic2000(); describe("Load PDF", () => { bench("libpdf", async () => { @@ -119,3 +121,145 @@ describe("Load, modify, and save PDF", () => { await pdf.save(); }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// Page splitting comparison (issue #26) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Extract single page from 100-page PDF", () => { + bench("libpdf", async () => { + const pdf = await PDF.load(synthetic100); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); + + bench("pdf-lib", async () => { + const pdf = await PDFDocument.load(synthetic100); + const newDoc = await PDFDocument.create(); + const [page] = await newDoc.copyPages(pdf, [0]); + newDoc.addPage(page); + await newDoc.save(); + }); +}); + +describe("Split 100-page PDF into single-page PDFs", () => { + bench( + "libpdf", + async () => { + const pdf = await PDF.load(synthetic100); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + "pdf-lib", + async () => { + const pdf = await PDFDocument.load(synthetic100); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const newDoc = await PDFDocument.create(); + const [page] = await newDoc.copyPages(pdf, [i]); + newDoc.addPage(page); + await newDoc.save(); + } + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); + +describe(`Split 2000-page PDF into single-page PDFs (${(synthetic2000.length / 1024 / 1024).toFixed(1)}MB)`, () => { + bench( + "libpdf", + async () => { + const pdf = await PDF.load(synthetic2000); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 0, iterations: 1, time: 0 }, + ); + + bench( + "pdf-lib", + async () => { + const pdf = await PDFDocument.load(synthetic2000); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const newDoc = await PDFDocument.create(); + const [page] = await newDoc.copyPages(pdf, [i]); + newDoc.addPage(page); + await newDoc.save(); + } + }, + { warmupIterations: 0, iterations: 1, time: 0 }, + ); +}); + +describe("Copy 10 pages between documents", () => { + bench("libpdf", async () => { + const source = await PDF.load(synthetic100); + const dest = PDF.create(); + const indices = Array.from({ length: 10 }, (_, i) => i); + await dest.copyPagesFrom(source, indices); + await dest.save(); + }); + + bench("pdf-lib", async () => { + const source = await PDFDocument.load(synthetic100); + const dest = await PDFDocument.create(); + const indices = Array.from({ length: 10 }, (_, i) => i); + const pages = await dest.copyPages(source, indices); + + for (const page of pages) { + dest.addPage(page); + } + + await dest.save(); + }); +}); + +describe("Merge 2 x 100-page PDFs", () => { + bench( + "libpdf", + async () => { + const merged = await PDF.merge([synthetic100, synthetic100]); + await merged.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + "pdf-lib", + async () => { + const doc1 = await PDFDocument.load(synthetic100); + const doc2 = await PDFDocument.load(synthetic100); + const merged = await PDFDocument.create(); + + const pages1 = await merged.copyPages(doc1, doc1.getPageIndices()); + + for (const page of pages1) { + merged.addPage(page); + } + + const pages2 = await merged.copyPages(doc2, doc2.getPageIndices()); + + for (const page of pages2) { + merged.addPage(page); + } + + await merged.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); diff --git a/benchmarks/copying.bench.ts b/benchmarks/copying.bench.ts new file mode 100644 index 0000000..034b6c2 --- /dev/null +++ b/benchmarks/copying.bench.ts @@ -0,0 +1,94 @@ +/** + * PDF page-copying and merging benchmarks. + * + * Tests the performance of copying pages between documents and merging + * multiple PDFs. These operations are closely related to splitting + * (issue #26) and represent the other side of the workflow. + */ + +import { bench, describe } from "vitest"; + +import { PDF } from "../src"; +import { getSynthetic100, loadFixture, mediumPdfPath } from "./fixtures"; + +// Pre-load fixtures +const mediumPdf = await loadFixture(mediumPdfPath); +const synthetic100 = await getSynthetic100(); + +// ───────────────────────────────────────────────────────────────────────────── +// Page copying +// ───────────────────────────────────────────────────────────────────────────── + +describe("Copy pages between documents", () => { + bench("copy 1 page", async () => { + const source = await PDF.load(mediumPdf); + const dest = PDF.create(); + await dest.copyPagesFrom(source, [0]); + await dest.save(); + }); + + bench("copy 10 pages from 100-page PDF", async () => { + const source = await PDF.load(synthetic100); + const dest = PDF.create(); + const indices = Array.from({ length: 10 }, (_, i) => i); + await dest.copyPagesFrom(source, indices); + await dest.save(); + }); + + bench( + "copy all 100 pages", + async () => { + const source = await PDF.load(synthetic100); + const dest = PDF.create(); + const indices = Array.from({ length: 100 }, (_, i) => i); + await dest.copyPagesFrom(source, indices); + await dest.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Self-copy (page duplication) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Duplicate pages within same document", () => { + bench("duplicate page 0", async () => { + const pdf = await PDF.load(mediumPdf); + await pdf.copyPagesFrom(pdf, [0]); + await pdf.save(); + }); + + bench("duplicate all pages (double the document)", async () => { + const pdf = await PDF.load(mediumPdf); + const indices = Array.from({ length: pdf.getPageCount() }, (_, i) => i); + await pdf.copyPagesFrom(pdf, indices); + await pdf.save(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Merging +// ───────────────────────────────────────────────────────────────────────────── + +describe("Merge PDFs", () => { + bench("merge 2 small PDFs", async () => { + const merged = await PDF.merge([mediumPdf, mediumPdf]); + await merged.save(); + }); + + bench("merge 10 small PDFs", async () => { + const sources = Array.from({ length: 10 }, () => mediumPdf); + const merged = await PDF.merge(sources); + await merged.save(); + }); + + bench( + "merge 2 x 100-page PDFs", + async () => { + const merged = await PDF.merge([synthetic100, synthetic100]); + await merged.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); diff --git a/benchmarks/fixtures.ts b/benchmarks/fixtures.ts index b18e21c..4cf11f2 100644 --- a/benchmarks/fixtures.ts +++ b/benchmarks/fixtures.ts @@ -1,12 +1,16 @@ /** * Benchmark fixture helpers. * - * Provides utilities for loading PDF fixtures for benchmarks. + * Provides utilities for loading PDF fixtures for benchmarks, + * including synthetic large PDFs built by copying pages from + * existing fixtures. */ import { existsSync, mkdirSync, writeFileSync } from "node:fs"; import { readFile } from "node:fs/promises"; +import { PDF } from "../src"; + // Heavy PDF - downloaded on first run (~10MB) const HEAVY_PDF_PATH = "fixtures/benchmarks/cc-journalists-guide.pdf"; const HEAVY_PDF_URL = @@ -15,6 +19,11 @@ const HEAVY_PDF_URL = // Fallback large PDF - use existing fixture from pdfbox malformed tests (2MB) const LARGE_PDF_FALLBACK = "fixtures/malformed/pdfbox/PDFBOX-3947.pdf"; +// Synthetic PDFs - generated on first run, cached locally +const SYNTHETIC_DIR = "fixtures/benchmarks"; +const SYNTHETIC_100_PATH = `${SYNTHETIC_DIR}/synthetic-100p.pdf`; +const SYNTHETIC_2000_PATH = `${SYNTHETIC_DIR}/synthetic-2000p.pdf`; + /** * Load a fixture file as bytes. */ @@ -65,6 +74,90 @@ export async function getLargePdf(): Promise { return loadFixture(LARGE_PDF_FALLBACK); } +/** + * Build a synthetic PDF with the given number of pages by copying + * pages from sample.pdf. Each page gets unique text to simulate + * real-world content variation. + */ +async function buildSyntheticPdf(pageCount: number): Promise { + const sourceBytes = await loadFixture(mediumPdfPath); + const source = await PDF.load(sourceBytes); + const sourcePageCount = source.getPageCount(); + + // Start by copying the source pages + const pdf = await PDF.load(sourceBytes); + + // Copy pages from source repeatedly until we reach the target count + const pagesNeeded = pageCount - sourcePageCount; + + if (pagesNeeded > 0) { + // Build an array of source page indices to copy in bulk + const indices: number[] = []; + + for (let i = 0; i < pagesNeeded; i++) { + indices.push(i % sourcePageCount); + } + + await pdf.copyPagesFrom(source, indices); + } + + // Add unique text to each page so content varies + for (let i = 0; i < pdf.getPageCount(); i++) { + const page = pdf.getPage(i); + + if (page) { + page.drawText(`Page ${i + 1} of ${pageCount}`, { + x: 50, + y: 20, + font: "Helvetica", + size: 8, + }); + } + } + + return pdf.save(); +} + +/** + * Get or create a synthetic PDF cached to disk. + */ +async function getOrCreateSynthetic(path: string, pageCount: number): Promise { + if (existsSync(path)) { + return loadFixture(path); + } + + console.log(`Building synthetic ${pageCount}-page PDF...`); + const start = performance.now(); + + const bytes = await buildSyntheticPdf(pageCount); + + mkdirSync(SYNTHETIC_DIR, { recursive: true }); + writeFileSync(path, bytes); + + const elapsed = ((performance.now() - start) / 1000).toFixed(1); + const size = (bytes.length / 1024 / 1024).toFixed(1); + + console.log(`Cached ${pageCount}-page PDF to ${path} (${size}MB) in ${elapsed}s`); + + return bytes; +} + +/** + * Get a synthetic 100-page PDF. + * Built by copying pages from sample.pdf. Cached on disk after first build. + */ +export async function getSynthetic100(): Promise { + return getOrCreateSynthetic(SYNTHETIC_100_PATH, 100); +} + +/** + * Get a synthetic 2000-page PDF. + * Built by copying pages from sample.pdf. Cached on disk after first build. + */ +export async function getSynthetic2000(): Promise { + return getOrCreateSynthetic(SYNTHETIC_2000_PATH, 2000); +} + // Pre-load common fixtures export const smallPdfPath = "fixtures/basic/rot0.pdf"; export const mediumPdfPath = "fixtures/basic/sample.pdf"; diff --git a/benchmarks/splitting.bench.ts b/benchmarks/splitting.bench.ts new file mode 100644 index 0000000..23f5708 --- /dev/null +++ b/benchmarks/splitting.bench.ts @@ -0,0 +1,119 @@ +/** + * PDF page-splitting benchmarks. + * + * Tests the performance of splitting a PDF into individual single-page PDFs. + * This is the primary benchmark requested in issue #26 for users who need + * to split 2000+ page documents at high throughput. + * + * Scenarios: + * - Extract single page (baseline) + * - Split 100-page PDF into individual pages + * - Split 2000-page PDF into individual pages + */ + +import { bench, describe } from "vitest"; + +import { PDF } from "../src"; +import { getSynthetic100, getSynthetic2000, loadFixture, mediumPdfPath } from "./fixtures"; + +// Pre-load fixtures outside benchmarks to isolate I/O from measurements +const mediumPdf = await loadFixture(mediumPdfPath); +const synthetic100 = await getSynthetic100(); +const synthetic2000 = await getSynthetic2000(); + +// ───────────────────────────────────────────────────────────────────────────── +// Single page extraction (baseline) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Extract single page", () => { + bench("extractPages (1 page from small PDF)", async () => { + const pdf = await PDF.load(mediumPdf); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); + + bench("extractPages (1 page from 100-page PDF)", async () => { + const pdf = await PDF.load(synthetic100); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); + + bench("extractPages (1 page from 2000-page PDF)", async () => { + const pdf = await PDF.load(synthetic2000); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Full split: every page into its own PDF +// ───────────────────────────────────────────────────────────────────────────── + +describe("Split into single-page PDFs", () => { + bench( + `split 100-page PDF (${(synthetic100.length / 1024 / 1024).toFixed(1)}MB)`, + async () => { + const pdf = await PDF.load(synthetic100); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + `split 2000-page PDF (${(synthetic2000.length / 1024 / 1024).toFixed(1)}MB)`, + async () => { + const pdf = await PDF.load(synthetic2000); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 0, iterations: 1, time: 0 }, + ); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Batch extraction: extract ranges of pages +// ───────────────────────────────────────────────────────────────────────────── + +describe("Batch page extraction", () => { + bench( + "extract first 10 pages from 2000-page PDF", + async () => { + const pdf = await PDF.load(synthetic2000); + const indices = Array.from({ length: 10 }, (_, i) => i); + const extracted = await pdf.extractPages(indices); + await extracted.save(); + }, + { warmupIterations: 1, iterations: 5 }, + ); + + bench( + "extract first 100 pages from 2000-page PDF", + async () => { + const pdf = await PDF.load(synthetic2000); + const indices = Array.from({ length: 100 }, (_, i) => i); + const extracted = await pdf.extractPages(indices); + await extracted.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + "extract every 10th page from 2000-page PDF (200 pages)", + async () => { + const pdf = await PDF.load(synthetic2000); + const indices = Array.from({ length: 200 }, (_, i) => i * 10); + const extracted = await pdf.extractPages(indices); + await extracted.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); diff --git a/package.json b/package.json index b511e67..230f00b 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ }, "scripts": { "bench": "vitest bench", + "bench:report": "bun run scripts/bench-report.ts", "build": "tsdown", "docs:build": "bun run --cwd apps/docs build", "docs:dev": "bun run --cwd apps/docs dev", diff --git a/reports/benchmarks.md b/reports/benchmarks.md new file mode 100644 index 0000000..b0b0010 --- /dev/null +++ b/reports/benchmarks.md @@ -0,0 +1,235 @@ +# Benchmark Report + +> Generated on 2026-02-16 at 12:50:00 UTC +> +> System: darwin | Apple M4 Pro (12 cores) | 24GB RAM | Bun 1.3.5 + +--- + +## Contents + +- [Comparison](#comparison) +- [Copying](#copying) +- [Drawing](#drawing) +- [Forms](#forms) +- [Loading](#loading) +- [Saving](#saving) +- [Splitting](#splitting) + +## Comparison + +### Load PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 895.0 | 1.12ms | 1.59ms | ±1.07% | 448 | +| pdf-lib | 36.7 | 27.21ms | 29.03ms | ±1.90% | 19 | + +- **libpdf** is 24.35x faster than pdf-lib + +### Create blank PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ---: | ----: | -----: | ------: | +| libpdf | 38.6K | 26us | 45us | ±0.84% | 19,283 | +| pdf-lib | 10.3K | 97us | 461us | ±1.82% | 5,172 | + +- **libpdf** is 3.73x faster than pdf-lib + +### Add 10 pages + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ----: | ----: | -----: | ------: | +| libpdf | 19.1K | 52us | 87us | ±0.91% | 9,562 | +| pdf-lib | 6.3K | 158us | 770us | ±2.55% | 3,173 | + +- **libpdf** is 3.01x faster than pdf-lib + +### Draw 50 rectangles + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | -----: | -----: | ------: | +| pdf-lib | 2.2K | 458us | 1.80ms | ±3.80% | 1,093 | +| libpdf | 627.4 | 1.59ms | 2.30ms | ±1.44% | 314 | + +- **pdf-lib** is 3.48x faster than libpdf + +### Load and save PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 909.3 | 1.10ms | 1.48ms | ±0.88% | 456 | +| pdf-lib | 22.0 | 45.45ms | 58.21ms | ±6.77% | 11 | + +- **libpdf** is 41.33x faster than pdf-lib + +### Load, modify, and save PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 37.9 | 26.38ms | 33.50ms | ±5.76% | 20 | +| pdf-lib | 23.1 | 43.25ms | 44.89ms | ±1.52% | 12 | + +- **libpdf** is 1.64x faster than pdf-lib + +### Extract single page from 100-page PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | -----: | -----: | ------: | +| libpdf | 503.4 | 1.99ms | 3.10ms | ±1.55% | 252 | +| pdf-lib | 155.9 | 6.41ms | 7.44ms | ±1.53% | 79 | + +- **libpdf** is 3.23x faster than pdf-lib + +### Split 100-page PDF into single-page PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 35.3 | 28.29ms | 29.79ms | ±1.35% | 18 | +| pdf-lib | 35.0 | 28.58ms | 32.30ms | ±3.07% | 18 | + +- **libpdf** is 1.01x faster than pdf-lib + +### Copy 10 pages between documents + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | ------: | -----: | ------: | +| libpdf | 334.0 | 2.99ms | 3.55ms | ±1.12% | 168 | +| pdf-lib | 103.7 | 9.64ms | 14.95ms | ±3.70% | 52 | + +- **libpdf** is 3.22x faster than pdf-lib + +### Merge 2 x 100-page PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 47.1 | 21.23ms | 24.31ms | ±2.08% | 24 | +| pdf-lib | 22.9 | 43.64ms | 47.48ms | ±2.23% | 12 | + +- **libpdf** is 2.06x faster than pdf-lib + +## Copying + +### Copy pages between documents + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :------------------------------ | ------: | ------: | ------: | -----: | ------: | +| copy 1 page | 2.3K | 429us | 735us | ±1.26% | 1,166 | +| copy 10 pages from 100-page PDF | 344.1 | 2.91ms | 3.57ms | ±1.12% | 173 | +| copy all 100 pages | 92.3 | 10.84ms | 13.86ms | ±1.96% | 47 | + +- **copy 1 page** is 6.78x faster than copy 10 pages from 100-page PDF +- **copy 1 page** is 25.27x faster than copy all 100 pages + +### Duplicate pages within same document + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------------------------- | ------: | ----: | ----: | -----: | ------: | +| duplicate all pages (double the document) | 2.2K | 461us | 798us | ±0.89% | 1,086 | +| duplicate page 0 | 2.2K | 464us | 758us | ±0.77% | 1,078 | + +- **duplicate all pages (double the document)** is 1.01x faster than duplicate page 0 + +### Merge PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------- | ------: | ------: | ------: | -----: | ------: | +| merge 2 small PDFs | 1.4K | 712us | 1.12ms | ±1.00% | 702 | +| merge 10 small PDFs | 254.5 | 3.93ms | 5.73ms | ±2.08% | 128 | +| merge 2 x 100-page PDFs | 48.6 | 20.58ms | 26.32ms | ±3.33% | 25 | + +- **merge 2 small PDFs** is 5.51x faster than merge 10 small PDFs +- **merge 2 small PDFs** is 28.89x faster than merge 2 x 100-page PDFs + +## Drawing + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------------------- | ------: | -----: | -----: | -----: | ------: | +| draw 100 lines | 399.8 | 2.50ms | 2.74ms | ±0.51% | 200 | +| draw 100 rectangles | 360.3 | 2.78ms | 3.55ms | ±1.22% | 181 | +| draw 100 circles | 279.7 | 3.58ms | 4.42ms | ±1.33% | 140 | +| draw 100 text lines (standard font) | 259.5 | 3.85ms | 4.29ms | ±0.61% | 130 | +| create 10 pages with mixed content | 193.0 | 5.18ms | 6.40ms | ±1.35% | 97 | + +- **draw 100 lines** is 1.11x faster than draw 100 rectangles +- **draw 100 lines** is 1.43x faster than draw 100 circles +- **draw 100 lines** is 1.54x faster than draw 100 text lines (standard font) +- **draw 100 lines** is 2.07x faster than create 10 pages with mixed content + +## Forms + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------- | ------: | -----: | -----: | -----: | ------: | +| read field values | 702.6 | 1.42ms | 1.85ms | ±0.97% | 352 | +| get form fields | 677.3 | 1.48ms | 2.36ms | ±1.45% | 339 | +| flatten form | 198.8 | 5.03ms | 5.84ms | ±1.28% | 100 | +| fill text fields | 155.1 | 6.45ms | 7.36ms | ±1.25% | 78 | + +- **read field values** is 1.04x faster than get form fields +- **read field values** is 3.53x faster than flatten form +- **read field values** is 4.53x faster than fill text fields + +## Loading + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :--------------------- | ------: | -----: | -----: | -----: | ------: | +| load small PDF (888B) | 38.7K | 26us | 37us | ±1.10% | 19,336 | +| load medium PDF (19KB) | 23.8K | 42us | 54us | ±0.84% | 11,904 | +| load form PDF (116KB) | 1.6K | 639us | 1.07ms | ±0.96% | 782 | +| load heavy PDF (9.9MB) | 909.7 | 1.10ms | 1.46ms | ±0.81% | 455 | + +- **load small PDF (888B)** is 1.62x faster than load medium PDF (19KB) +- **load small PDF (888B)** is 24.73x faster than load form PDF (116KB) +- **load small PDF (888B)** is 42.51x faster than load heavy PDF (9.9MB) + +## Saving + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :--------------------------------- | ------: | -----: | -----: | -----: | ------: | +| save unmodified (19KB) | 22.2K | 45us | 63us | ±0.78% | 11,103 | +| incremental save (19KB) | 6.9K | 144us | 373us | ±1.14% | 3,461 | +| save with modifications (19KB) | 2.4K | 422us | 799us | ±0.97% | 1,185 | +| save heavy PDF (9.9MB) | 850.9 | 1.18ms | 1.58ms | ±0.91% | 426 | +| incremental save heavy PDF (9.9MB) | 494.7 | 2.02ms | 2.37ms | ±0.74% | 248 | + +- **save unmodified (19KB)** is 3.21x faster than incremental save (19KB) +- **save unmodified (19KB)** is 9.37x faster than save with modifications (19KB) +- **save unmodified (19KB)** is 26.10x faster than save heavy PDF (9.9MB) +- **save unmodified (19KB)** is 44.89x faster than incremental save heavy PDF (9.9MB) + +## Splitting + +### Extract single page + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :--------------------------------------- | ------: | ------: | ------: | -----: | ------: | +| extractPages (1 page from small PDF) | 2.2K | 452us | 931us | ±1.63% | 1,106 | +| extractPages (1 page from 100-page PDF) | 536.3 | 1.86ms | 3.04ms | ±1.54% | 269 | +| extractPages (1 page from 2000-page PDF) | 24.7 | 40.43ms | 42.46ms | ±2.46% | 13 | + +- **extractPages (1 page from small PDF)** is 4.12x faster than extractPages (1 page from 100-page PDF) +- **extractPages (1 page from small PDF)** is 89.36x faster than extractPages (1 page from 2000-page PDF) + +### Split into single-page PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------------------------- | ------: | -------: | -------: | -----: | ------: | +| split 100-page PDF (0.1MB) | 32.2 | 31.02ms | 35.44ms | ±2.72% | 17 | +| split 2000-page PDF (0.9MB) | 1.8 | 550.66ms | 550.66ms | ±0.00% | 1 | + +- **split 100-page PDF (0.1MB)** is 17.75x faster than split 2000-page PDF (0.9MB) + +### Batch page extraction + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :----------------------------------------------------- | ------: | ------: | ------: | -----: | ------: | +| extract first 10 pages from 2000-page PDF | 23.3 | 42.90ms | 46.80ms | ±3.59% | 12 | +| extract first 100 pages from 2000-page PDF | 20.1 | 49.79ms | 52.72ms | ±2.98% | 11 | +| extract every 10th page from 2000-page PDF (200 pages) | 18.6 | 53.74ms | 59.30ms | ±3.19% | 10 | + +- **extract first 10 pages from 2000-page PDF** is 1.16x faster than extract first 100 pages from 2000-page PDF +- **extract first 10 pages from 2000-page PDF** is 1.25x faster than extract every 10th page from 2000-page PDF (200 pages) + +--- + +_Results are machine-dependent. Use for relative comparison only._ diff --git a/scripts/bench-report.ts b/scripts/bench-report.ts new file mode 100644 index 0000000..d16617d --- /dev/null +++ b/scripts/bench-report.ts @@ -0,0 +1,262 @@ +/** + * Benchmark report generator. + * + * Runs vitest bench with JSON output, then transforms the results + * into a markdown report saved to reports/benchmarks.md. + * + * Usage: + * bun run scripts/bench-report.ts + * bun run scripts/bench-report.ts --json-only # Just dump JSON, skip markdown + * bun run scripts/bench-report.ts --from-json results.json # Generate from existing JSON + */ + +import { execSync } from "node:child_process"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { cpus, platform, totalmem } from "node:os"; + +// ───────────────────────────────────────────────────────────────────────────── +// Types for vitest bench JSON output +// ───────────────────────────────────────────────────────────────────────────── + +interface BenchmarkResult { + name: string; + rank: number; + rme: number; + hz: number; + min: number; + max: number; + mean: number; + p75: number; + p99: number; + p995: number; + p999: number; + sampleCount: number; + median: number; +} + +interface BenchmarkGroup { + fullName: string; + benchmarks: BenchmarkResult[]; +} + +interface BenchmarkFile { + filepath: string; + groups: BenchmarkGroup[]; +} + +interface BenchmarkOutput { + files: BenchmarkFile[]; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function formatHz(hz: number): string { + if (hz >= 1000) { + return `${(hz / 1000).toFixed(1)}K`; + } + + if (hz >= 1) { + return hz.toFixed(1); + } + + return hz.toFixed(3); +} + +function formatTime(ms: number): string { + if (ms >= 1000) { + return `${(ms / 1000).toFixed(2)}s`; + } + + if (ms >= 1) { + return `${ms.toFixed(2)}ms`; + } + + return `${(ms * 1000).toFixed(0)}us`; +} + +function formatRme(rme: number): string { + return `\u00b1${rme.toFixed(2)}%`; +} + +function getSystemInfo(): string { + const cpu = cpus()[0]; + const cpuModel = cpu?.model ?? "Unknown CPU"; + const cpuCount = cpus().length; + const mem = (totalmem() / 1024 / 1024 / 1024).toFixed(0); + const os = platform(); + const runtime = `Bun ${process.versions.bun ?? "unknown"}`; + + return `${os} | ${cpuModel} (${cpuCount} cores) | ${mem}GB RAM | ${runtime}`; +} + +/** + * Extract a short file label from a benchmark filepath. + * e.g. "/Users/.../benchmarks/loading.bench.ts" -> "Loading" + */ +function fileLabel(filepath: string): string { + const match = filepath.match(/([^/]+)\.bench\.ts$/); + + if (!match) { + return filepath; + } + + const name = match[1]; + + return name.charAt(0).toUpperCase() + name.slice(1); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Markdown generation +// ───────────────────────────────────────────────────────────────────────────── + +function generateMarkdown(data: BenchmarkOutput): string { + const lines: string[] = []; + const now = new Date(); + const dateStr = now.toISOString().split("T")[0]; + const timeStr = now.toISOString().split("T")[1].split(".")[0]; + + lines.push("# Benchmark Report"); + lines.push(""); + lines.push(`> Generated on ${dateStr} at ${timeStr} UTC`); + lines.push(`>`); + lines.push(`> System: ${getSystemInfo()}`); + lines.push(""); + lines.push("---"); + lines.push(""); + + // Table of contents + lines.push("## Contents"); + lines.push(""); + + for (const file of data.files) { + const label = fileLabel(file.filepath); + const anchor = label.toLowerCase().replace(/\s+/g, "-"); + lines.push(`- [${label}](#${anchor})`); + } + + lines.push(""); + + // Each file becomes a section + for (const file of data.files) { + const label = fileLabel(file.filepath); + lines.push(`## ${label}`); + lines.push(""); + + for (const group of file.groups) { + // If the group name differs from the file-level name, add a subheading + const groupName = group.fullName.replace(/^benchmarks\/[^>]+> /, "").trim(); + const isTopLevel = group.benchmarks.length > 0 && !groupName.includes(" > "); + + // Check if this group has a describe() wrapper (indicated by " > " in fullName) + const describeName = group.fullName.includes(" > ") + ? group.fullName.split(" > ").slice(1).join(" > ") + : null; + + if (describeName) { + lines.push(`### ${describeName}`); + lines.push(""); + } + + // Build the results table + lines.push("| Benchmark | ops/sec | Mean | p99 | RME | Samples |"); + lines.push("|:---|---:|---:|---:|---:|---:|"); + + // Sort by rank + const sorted = [...group.benchmarks].sort((a, b) => a.rank - b.rank); + + for (const bench of sorted) { + const name = bench.name; + const hz = formatHz(bench.hz); + const mean = formatTime(bench.mean); + const p99 = formatTime(bench.p99); + const rme = formatRme(bench.rme); + const samples = bench.sampleCount.toLocaleString(); + + lines.push(`| ${name} | ${hz} | ${mean} | ${p99} | ${rme} | ${samples} |`); + } + + lines.push(""); + + // Add comparison summary for groups with multiple benchmarks + if (sorted.length >= 2) { + const fastest = sorted[0]; + const rest = sorted.slice(1); + + for (const slower of rest) { + const ratio = (fastest.hz / slower.hz).toFixed(2); + lines.push(`- **${fastest.name}** is ${ratio}x faster than ${slower.name}`); + } + + lines.push(""); + } + } + } + + // Footer + lines.push("---"); + lines.push(""); + lines.push("*Results are machine-dependent. Use for relative comparison only.*"); + lines.push(""); + + return lines.join("\n"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main +// ───────────────────────────────────────────────────────────────────────────── + +const args = process.argv.slice(2); +const jsonOnly = args.includes("--json-only"); +const fromJsonIdx = args.indexOf("--from-json"); + +const jsonPath = "reports/bench-results.json"; +const mdPath = "reports/benchmarks.md"; + +mkdirSync("reports", { recursive: true }); + +let data: BenchmarkOutput; + +if (fromJsonIdx !== -1 && args[fromJsonIdx + 1]) { + // Generate markdown from an existing JSON file + const inputPath = args[fromJsonIdx + 1]; + + if (!existsSync(inputPath)) { + console.error(`File not found: ${inputPath}`); + process.exit(1); + } + + data = JSON.parse(readFileSync(inputPath, "utf-8")) as BenchmarkOutput; + console.log(`Loaded benchmark results from ${inputPath}`); +} else { + // Run benchmarks and capture JSON + console.log("Running benchmarks...\n"); + + try { + execSync(`bun run bench -- --outputJson ${jsonPath}`, { + stdio: "inherit", + timeout: 600_000, // 10 minute timeout + }); + } catch (error) { + console.error("Benchmark run failed"); + process.exit(1); + } + + if (!existsSync(jsonPath)) { + console.error(`Expected JSON output at ${jsonPath} but file not found`); + process.exit(1); + } + + data = JSON.parse(readFileSync(jsonPath, "utf-8")) as BenchmarkOutput; + console.log(`\nBenchmark JSON saved to ${jsonPath}`); +} + +if (jsonOnly) { + process.exit(0); +} + +// Generate and write markdown report +const md = generateMarkdown(data); +writeFileSync(mdPath, md); +console.log(`Benchmark report saved to ${mdPath}`);