From adecd18bf733d9758237e21b5b8213588a8719f0 Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Mon, 16 Feb 2026 23:58:15 +1100 Subject: [PATCH 1/9] feat(bench): add page splitting/copying benchmarks and markdown report Add benchmarks for page splitting, copying, and merging (#26). Synthetic 100-page and 2000-page PDFs are generated from sample.pdf and cached to disk for reuse. New benchmark suites: - splitting.bench.ts: single-page extraction, full split, batch extract - copying.bench.ts: cross-doc copy, duplication, merging - comparison.bench.ts: head-to-head vs pdf-lib for all of the above Report generation: - scripts/bench-report.ts transforms vitest JSON output to markdown - reports/benchmarks.md committed to repo, updated by CI - .github/workflows/bench.yml runs weekly + on push to main --- .github/workflows/bench.yml | 56 +++++++ .gitignore | 3 + benchmarks/comparison.bench.ts | 148 ++++++++++++++++++- benchmarks/copying.bench.ts | 94 ++++++++++++ benchmarks/fixtures.ts | 95 +++++++++++- benchmarks/splitting.bench.ts | 119 +++++++++++++++ package.json | 1 + reports/benchmarks.md | 235 +++++++++++++++++++++++++++++ scripts/bench-report.ts | 262 +++++++++++++++++++++++++++++++++ 9 files changed, 1010 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/bench.yml create mode 100644 benchmarks/copying.bench.ts create mode 100644 benchmarks/splitting.bench.ts create mode 100644 reports/benchmarks.md create mode 100644 scripts/bench-report.ts diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml new file mode 100644 index 0000000..cae6a1f --- /dev/null +++ b/.github/workflows/bench.yml @@ -0,0 +1,56 @@ +name: Benchmarks + +on: + # Run on pushes to main (to keep report up to date) + push: + branches: [main] + # Run weekly on Mondays at 06:00 UTC + schedule: + - cron: "0 6 * * 1" + # Allow manual trigger + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + +jobs: + bench: + name: Run Benchmarks + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run benchmarks and generate report + run: bun run bench:report + + - name: Upload JSON results + uses: actions/upload-artifact@v4 + with: + name: bench-results + path: reports/bench-results.json + retention-days: 90 + + - name: Commit updated report + run: | + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + git add reports/benchmarks.md + + if git diff --staged --quiet; then + echo "No changes to benchmark report" + else + git commit -m "docs: update benchmark report" + git push + fi diff --git a/.gitignore b/.gitignore index 0d003c2..fd26d33 100644 --- a/.gitignore +++ b/.gitignore @@ -52,5 +52,8 @@ debug/ fixtures/benchmarks/ fixtures/private/ +# Benchmark JSON results (machine-specific) +reports/bench-results.json + # Temporary files tmp/ diff --git a/benchmarks/comparison.bench.ts b/benchmarks/comparison.bench.ts index 255a57d..355c33d 100644 --- a/benchmarks/comparison.bench.ts +++ b/benchmarks/comparison.bench.ts @@ -9,10 +9,12 @@ import { PDFDocument } from "pdf-lib"; import { bench, describe } from "vitest"; import { PDF } from "../src"; -import { loadFixture, getHeavyPdf } from "./fixtures"; +import { getHeavyPdf, getSynthetic100, getSynthetic2000, loadFixture } from "./fixtures"; -// Pre-load fixture +// Pre-load fixtures const pdfBytes = await getHeavyPdf(); +const synthetic100 = await getSynthetic100(); +const synthetic2000 = await getSynthetic2000(); describe("Load PDF", () => { bench("libpdf", async () => { @@ -119,3 +121,145 @@ describe("Load, modify, and save PDF", () => { await pdf.save(); }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// Page splitting comparison (issue #26) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Extract single page from 100-page PDF", () => { + bench("libpdf", async () => { + const pdf = await PDF.load(synthetic100); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); + + bench("pdf-lib", async () => { + const pdf = await PDFDocument.load(synthetic100); + const newDoc = await PDFDocument.create(); + const [page] = await newDoc.copyPages(pdf, [0]); + newDoc.addPage(page); + await newDoc.save(); + }); +}); + +describe("Split 100-page PDF into single-page PDFs", () => { + bench( + "libpdf", + async () => { + const pdf = await PDF.load(synthetic100); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + "pdf-lib", + async () => { + const pdf = await PDFDocument.load(synthetic100); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const newDoc = await PDFDocument.create(); + const [page] = await newDoc.copyPages(pdf, [i]); + newDoc.addPage(page); + await newDoc.save(); + } + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); + +describe(`Split 2000-page PDF into single-page PDFs (${(synthetic2000.length / 1024 / 1024).toFixed(1)}MB)`, () => { + bench( + "libpdf", + async () => { + const pdf = await PDF.load(synthetic2000); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 0, iterations: 1, time: 0 }, + ); + + bench( + "pdf-lib", + async () => { + const pdf = await PDFDocument.load(synthetic2000); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const newDoc = await PDFDocument.create(); + const [page] = await newDoc.copyPages(pdf, [i]); + newDoc.addPage(page); + await newDoc.save(); + } + }, + { warmupIterations: 0, iterations: 1, time: 0 }, + ); +}); + +describe("Copy 10 pages between documents", () => { + bench("libpdf", async () => { + const source = await PDF.load(synthetic100); + const dest = PDF.create(); + const indices = Array.from({ length: 10 }, (_, i) => i); + await dest.copyPagesFrom(source, indices); + await dest.save(); + }); + + bench("pdf-lib", async () => { + const source = await PDFDocument.load(synthetic100); + const dest = await PDFDocument.create(); + const indices = Array.from({ length: 10 }, (_, i) => i); + const pages = await dest.copyPages(source, indices); + + for (const page of pages) { + dest.addPage(page); + } + + await dest.save(); + }); +}); + +describe("Merge 2 x 100-page PDFs", () => { + bench( + "libpdf", + async () => { + const merged = await PDF.merge([synthetic100, synthetic100]); + await merged.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + "pdf-lib", + async () => { + const doc1 = await PDFDocument.load(synthetic100); + const doc2 = await PDFDocument.load(synthetic100); + const merged = await PDFDocument.create(); + + const pages1 = await merged.copyPages(doc1, doc1.getPageIndices()); + + for (const page of pages1) { + merged.addPage(page); + } + + const pages2 = await merged.copyPages(doc2, doc2.getPageIndices()); + + for (const page of pages2) { + merged.addPage(page); + } + + await merged.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); diff --git a/benchmarks/copying.bench.ts b/benchmarks/copying.bench.ts new file mode 100644 index 0000000..034b6c2 --- /dev/null +++ b/benchmarks/copying.bench.ts @@ -0,0 +1,94 @@ +/** + * PDF page-copying and merging benchmarks. + * + * Tests the performance of copying pages between documents and merging + * multiple PDFs. These operations are closely related to splitting + * (issue #26) and represent the other side of the workflow. + */ + +import { bench, describe } from "vitest"; + +import { PDF } from "../src"; +import { getSynthetic100, loadFixture, mediumPdfPath } from "./fixtures"; + +// Pre-load fixtures +const mediumPdf = await loadFixture(mediumPdfPath); +const synthetic100 = await getSynthetic100(); + +// ───────────────────────────────────────────────────────────────────────────── +// Page copying +// ───────────────────────────────────────────────────────────────────────────── + +describe("Copy pages between documents", () => { + bench("copy 1 page", async () => { + const source = await PDF.load(mediumPdf); + const dest = PDF.create(); + await dest.copyPagesFrom(source, [0]); + await dest.save(); + }); + + bench("copy 10 pages from 100-page PDF", async () => { + const source = await PDF.load(synthetic100); + const dest = PDF.create(); + const indices = Array.from({ length: 10 }, (_, i) => i); + await dest.copyPagesFrom(source, indices); + await dest.save(); + }); + + bench( + "copy all 100 pages", + async () => { + const source = await PDF.load(synthetic100); + const dest = PDF.create(); + const indices = Array.from({ length: 100 }, (_, i) => i); + await dest.copyPagesFrom(source, indices); + await dest.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Self-copy (page duplication) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Duplicate pages within same document", () => { + bench("duplicate page 0", async () => { + const pdf = await PDF.load(mediumPdf); + await pdf.copyPagesFrom(pdf, [0]); + await pdf.save(); + }); + + bench("duplicate all pages (double the document)", async () => { + const pdf = await PDF.load(mediumPdf); + const indices = Array.from({ length: pdf.getPageCount() }, (_, i) => i); + await pdf.copyPagesFrom(pdf, indices); + await pdf.save(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Merging +// ───────────────────────────────────────────────────────────────────────────── + +describe("Merge PDFs", () => { + bench("merge 2 small PDFs", async () => { + const merged = await PDF.merge([mediumPdf, mediumPdf]); + await merged.save(); + }); + + bench("merge 10 small PDFs", async () => { + const sources = Array.from({ length: 10 }, () => mediumPdf); + const merged = await PDF.merge(sources); + await merged.save(); + }); + + bench( + "merge 2 x 100-page PDFs", + async () => { + const merged = await PDF.merge([synthetic100, synthetic100]); + await merged.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); diff --git a/benchmarks/fixtures.ts b/benchmarks/fixtures.ts index b18e21c..4cf11f2 100644 --- a/benchmarks/fixtures.ts +++ b/benchmarks/fixtures.ts @@ -1,12 +1,16 @@ /** * Benchmark fixture helpers. * - * Provides utilities for loading PDF fixtures for benchmarks. + * Provides utilities for loading PDF fixtures for benchmarks, + * including synthetic large PDFs built by copying pages from + * existing fixtures. */ import { existsSync, mkdirSync, writeFileSync } from "node:fs"; import { readFile } from "node:fs/promises"; +import { PDF } from "../src"; + // Heavy PDF - downloaded on first run (~10MB) const HEAVY_PDF_PATH = "fixtures/benchmarks/cc-journalists-guide.pdf"; const HEAVY_PDF_URL = @@ -15,6 +19,11 @@ const HEAVY_PDF_URL = // Fallback large PDF - use existing fixture from pdfbox malformed tests (2MB) const LARGE_PDF_FALLBACK = "fixtures/malformed/pdfbox/PDFBOX-3947.pdf"; +// Synthetic PDFs - generated on first run, cached locally +const SYNTHETIC_DIR = "fixtures/benchmarks"; +const SYNTHETIC_100_PATH = `${SYNTHETIC_DIR}/synthetic-100p.pdf`; +const SYNTHETIC_2000_PATH = `${SYNTHETIC_DIR}/synthetic-2000p.pdf`; + /** * Load a fixture file as bytes. */ @@ -65,6 +74,90 @@ export async function getLargePdf(): Promise { return loadFixture(LARGE_PDF_FALLBACK); } +/** + * Build a synthetic PDF with the given number of pages by copying + * pages from sample.pdf. Each page gets unique text to simulate + * real-world content variation. + */ +async function buildSyntheticPdf(pageCount: number): Promise { + const sourceBytes = await loadFixture(mediumPdfPath); + const source = await PDF.load(sourceBytes); + const sourcePageCount = source.getPageCount(); + + // Start by copying the source pages + const pdf = await PDF.load(sourceBytes); + + // Copy pages from source repeatedly until we reach the target count + const pagesNeeded = pageCount - sourcePageCount; + + if (pagesNeeded > 0) { + // Build an array of source page indices to copy in bulk + const indices: number[] = []; + + for (let i = 0; i < pagesNeeded; i++) { + indices.push(i % sourcePageCount); + } + + await pdf.copyPagesFrom(source, indices); + } + + // Add unique text to each page so content varies + for (let i = 0; i < pdf.getPageCount(); i++) { + const page = pdf.getPage(i); + + if (page) { + page.drawText(`Page ${i + 1} of ${pageCount}`, { + x: 50, + y: 20, + font: "Helvetica", + size: 8, + }); + } + } + + return pdf.save(); +} + +/** + * Get or create a synthetic PDF cached to disk. + */ +async function getOrCreateSynthetic(path: string, pageCount: number): Promise { + if (existsSync(path)) { + return loadFixture(path); + } + + console.log(`Building synthetic ${pageCount}-page PDF...`); + const start = performance.now(); + + const bytes = await buildSyntheticPdf(pageCount); + + mkdirSync(SYNTHETIC_DIR, { recursive: true }); + writeFileSync(path, bytes); + + const elapsed = ((performance.now() - start) / 1000).toFixed(1); + const size = (bytes.length / 1024 / 1024).toFixed(1); + + console.log(`Cached ${pageCount}-page PDF to ${path} (${size}MB) in ${elapsed}s`); + + return bytes; +} + +/** + * Get a synthetic 100-page PDF. + * Built by copying pages from sample.pdf. Cached on disk after first build. + */ +export async function getSynthetic100(): Promise { + return getOrCreateSynthetic(SYNTHETIC_100_PATH, 100); +} + +/** + * Get a synthetic 2000-page PDF. + * Built by copying pages from sample.pdf. Cached on disk after first build. + */ +export async function getSynthetic2000(): Promise { + return getOrCreateSynthetic(SYNTHETIC_2000_PATH, 2000); +} + // Pre-load common fixtures export const smallPdfPath = "fixtures/basic/rot0.pdf"; export const mediumPdfPath = "fixtures/basic/sample.pdf"; diff --git a/benchmarks/splitting.bench.ts b/benchmarks/splitting.bench.ts new file mode 100644 index 0000000..23f5708 --- /dev/null +++ b/benchmarks/splitting.bench.ts @@ -0,0 +1,119 @@ +/** + * PDF page-splitting benchmarks. + * + * Tests the performance of splitting a PDF into individual single-page PDFs. + * This is the primary benchmark requested in issue #26 for users who need + * to split 2000+ page documents at high throughput. + * + * Scenarios: + * - Extract single page (baseline) + * - Split 100-page PDF into individual pages + * - Split 2000-page PDF into individual pages + */ + +import { bench, describe } from "vitest"; + +import { PDF } from "../src"; +import { getSynthetic100, getSynthetic2000, loadFixture, mediumPdfPath } from "./fixtures"; + +// Pre-load fixtures outside benchmarks to isolate I/O from measurements +const mediumPdf = await loadFixture(mediumPdfPath); +const synthetic100 = await getSynthetic100(); +const synthetic2000 = await getSynthetic2000(); + +// ───────────────────────────────────────────────────────────────────────────── +// Single page extraction (baseline) +// ───────────────────────────────────────────────────────────────────────────── + +describe("Extract single page", () => { + bench("extractPages (1 page from small PDF)", async () => { + const pdf = await PDF.load(mediumPdf); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); + + bench("extractPages (1 page from 100-page PDF)", async () => { + const pdf = await PDF.load(synthetic100); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); + + bench("extractPages (1 page from 2000-page PDF)", async () => { + const pdf = await PDF.load(synthetic2000); + const extracted = await pdf.extractPages([0]); + await extracted.save(); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Full split: every page into its own PDF +// ───────────────────────────────────────────────────────────────────────────── + +describe("Split into single-page PDFs", () => { + bench( + `split 100-page PDF (${(synthetic100.length / 1024 / 1024).toFixed(1)}MB)`, + async () => { + const pdf = await PDF.load(synthetic100); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + `split 2000-page PDF (${(synthetic2000.length / 1024 / 1024).toFixed(1)}MB)`, + async () => { + const pdf = await PDF.load(synthetic2000); + const pageCount = pdf.getPageCount(); + + for (let i = 0; i < pageCount; i++) { + const single = await pdf.extractPages([i]); + await single.save(); + } + }, + { warmupIterations: 0, iterations: 1, time: 0 }, + ); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// Batch extraction: extract ranges of pages +// ───────────────────────────────────────────────────────────────────────────── + +describe("Batch page extraction", () => { + bench( + "extract first 10 pages from 2000-page PDF", + async () => { + const pdf = await PDF.load(synthetic2000); + const indices = Array.from({ length: 10 }, (_, i) => i); + const extracted = await pdf.extractPages(indices); + await extracted.save(); + }, + { warmupIterations: 1, iterations: 5 }, + ); + + bench( + "extract first 100 pages from 2000-page PDF", + async () => { + const pdf = await PDF.load(synthetic2000); + const indices = Array.from({ length: 100 }, (_, i) => i); + const extracted = await pdf.extractPages(indices); + await extracted.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); + + bench( + "extract every 10th page from 2000-page PDF (200 pages)", + async () => { + const pdf = await PDF.load(synthetic2000); + const indices = Array.from({ length: 200 }, (_, i) => i * 10); + const extracted = await pdf.extractPages(indices); + await extracted.save(); + }, + { warmupIterations: 1, iterations: 3 }, + ); +}); diff --git a/package.json b/package.json index b511e67..230f00b 100644 --- a/package.json +++ b/package.json @@ -46,6 +46,7 @@ }, "scripts": { "bench": "vitest bench", + "bench:report": "bun run scripts/bench-report.ts", "build": "tsdown", "docs:build": "bun run --cwd apps/docs build", "docs:dev": "bun run --cwd apps/docs dev", diff --git a/reports/benchmarks.md b/reports/benchmarks.md new file mode 100644 index 0000000..b0b0010 --- /dev/null +++ b/reports/benchmarks.md @@ -0,0 +1,235 @@ +# Benchmark Report + +> Generated on 2026-02-16 at 12:50:00 UTC +> +> System: darwin | Apple M4 Pro (12 cores) | 24GB RAM | Bun 1.3.5 + +--- + +## Contents + +- [Comparison](#comparison) +- [Copying](#copying) +- [Drawing](#drawing) +- [Forms](#forms) +- [Loading](#loading) +- [Saving](#saving) +- [Splitting](#splitting) + +## Comparison + +### Load PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 895.0 | 1.12ms | 1.59ms | ±1.07% | 448 | +| pdf-lib | 36.7 | 27.21ms | 29.03ms | ±1.90% | 19 | + +- **libpdf** is 24.35x faster than pdf-lib + +### Create blank PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ---: | ----: | -----: | ------: | +| libpdf | 38.6K | 26us | 45us | ±0.84% | 19,283 | +| pdf-lib | 10.3K | 97us | 461us | ±1.82% | 5,172 | + +- **libpdf** is 3.73x faster than pdf-lib + +### Add 10 pages + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ----: | ----: | -----: | ------: | +| libpdf | 19.1K | 52us | 87us | ±0.91% | 9,562 | +| pdf-lib | 6.3K | 158us | 770us | ±2.55% | 3,173 | + +- **libpdf** is 3.01x faster than pdf-lib + +### Draw 50 rectangles + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | -----: | -----: | ------: | +| pdf-lib | 2.2K | 458us | 1.80ms | ±3.80% | 1,093 | +| libpdf | 627.4 | 1.59ms | 2.30ms | ±1.44% | 314 | + +- **pdf-lib** is 3.48x faster than libpdf + +### Load and save PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 909.3 | 1.10ms | 1.48ms | ±0.88% | 456 | +| pdf-lib | 22.0 | 45.45ms | 58.21ms | ±6.77% | 11 | + +- **libpdf** is 41.33x faster than pdf-lib + +### Load, modify, and save PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 37.9 | 26.38ms | 33.50ms | ±5.76% | 20 | +| pdf-lib | 23.1 | 43.25ms | 44.89ms | ±1.52% | 12 | + +- **libpdf** is 1.64x faster than pdf-lib + +### Extract single page from 100-page PDF + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | -----: | -----: | ------: | +| libpdf | 503.4 | 1.99ms | 3.10ms | ±1.55% | 252 | +| pdf-lib | 155.9 | 6.41ms | 7.44ms | ±1.53% | 79 | + +- **libpdf** is 3.23x faster than pdf-lib + +### Split 100-page PDF into single-page PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 35.3 | 28.29ms | 29.79ms | ±1.35% | 18 | +| pdf-lib | 35.0 | 28.58ms | 32.30ms | ±3.07% | 18 | + +- **libpdf** is 1.01x faster than pdf-lib + +### Copy 10 pages between documents + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | ------: | -----: | ------: | +| libpdf | 334.0 | 2.99ms | 3.55ms | ±1.12% | 168 | +| pdf-lib | 103.7 | 9.64ms | 14.95ms | ±3.70% | 52 | + +- **libpdf** is 3.22x faster than pdf-lib + +### Merge 2 x 100-page PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 47.1 | 21.23ms | 24.31ms | ±2.08% | 24 | +| pdf-lib | 22.9 | 43.64ms | 47.48ms | ±2.23% | 12 | + +- **libpdf** is 2.06x faster than pdf-lib + +## Copying + +### Copy pages between documents + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :------------------------------ | ------: | ------: | ------: | -----: | ------: | +| copy 1 page | 2.3K | 429us | 735us | ±1.26% | 1,166 | +| copy 10 pages from 100-page PDF | 344.1 | 2.91ms | 3.57ms | ±1.12% | 173 | +| copy all 100 pages | 92.3 | 10.84ms | 13.86ms | ±1.96% | 47 | + +- **copy 1 page** is 6.78x faster than copy 10 pages from 100-page PDF +- **copy 1 page** is 25.27x faster than copy all 100 pages + +### Duplicate pages within same document + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------------------------- | ------: | ----: | ----: | -----: | ------: | +| duplicate all pages (double the document) | 2.2K | 461us | 798us | ±0.89% | 1,086 | +| duplicate page 0 | 2.2K | 464us | 758us | ±0.77% | 1,078 | + +- **duplicate all pages (double the document)** is 1.01x faster than duplicate page 0 + +### Merge PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------- | ------: | ------: | ------: | -----: | ------: | +| merge 2 small PDFs | 1.4K | 712us | 1.12ms | ±1.00% | 702 | +| merge 10 small PDFs | 254.5 | 3.93ms | 5.73ms | ±2.08% | 128 | +| merge 2 x 100-page PDFs | 48.6 | 20.58ms | 26.32ms | ±3.33% | 25 | + +- **merge 2 small PDFs** is 5.51x faster than merge 10 small PDFs +- **merge 2 small PDFs** is 28.89x faster than merge 2 x 100-page PDFs + +## Drawing + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------------------- | ------: | -----: | -----: | -----: | ------: | +| draw 100 lines | 399.8 | 2.50ms | 2.74ms | ±0.51% | 200 | +| draw 100 rectangles | 360.3 | 2.78ms | 3.55ms | ±1.22% | 181 | +| draw 100 circles | 279.7 | 3.58ms | 4.42ms | ±1.33% | 140 | +| draw 100 text lines (standard font) | 259.5 | 3.85ms | 4.29ms | ±0.61% | 130 | +| create 10 pages with mixed content | 193.0 | 5.18ms | 6.40ms | ±1.35% | 97 | + +- **draw 100 lines** is 1.11x faster than draw 100 rectangles +- **draw 100 lines** is 1.43x faster than draw 100 circles +- **draw 100 lines** is 1.54x faster than draw 100 text lines (standard font) +- **draw 100 lines** is 2.07x faster than create 10 pages with mixed content + +## Forms + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------- | ------: | -----: | -----: | -----: | ------: | +| read field values | 702.6 | 1.42ms | 1.85ms | ±0.97% | 352 | +| get form fields | 677.3 | 1.48ms | 2.36ms | ±1.45% | 339 | +| flatten form | 198.8 | 5.03ms | 5.84ms | ±1.28% | 100 | +| fill text fields | 155.1 | 6.45ms | 7.36ms | ±1.25% | 78 | + +- **read field values** is 1.04x faster than get form fields +- **read field values** is 3.53x faster than flatten form +- **read field values** is 4.53x faster than fill text fields + +## Loading + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :--------------------- | ------: | -----: | -----: | -----: | ------: | +| load small PDF (888B) | 38.7K | 26us | 37us | ±1.10% | 19,336 | +| load medium PDF (19KB) | 23.8K | 42us | 54us | ±0.84% | 11,904 | +| load form PDF (116KB) | 1.6K | 639us | 1.07ms | ±0.96% | 782 | +| load heavy PDF (9.9MB) | 909.7 | 1.10ms | 1.46ms | ±0.81% | 455 | + +- **load small PDF (888B)** is 1.62x faster than load medium PDF (19KB) +- **load small PDF (888B)** is 24.73x faster than load form PDF (116KB) +- **load small PDF (888B)** is 42.51x faster than load heavy PDF (9.9MB) + +## Saving + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :--------------------------------- | ------: | -----: | -----: | -----: | ------: | +| save unmodified (19KB) | 22.2K | 45us | 63us | ±0.78% | 11,103 | +| incremental save (19KB) | 6.9K | 144us | 373us | ±1.14% | 3,461 | +| save with modifications (19KB) | 2.4K | 422us | 799us | ±0.97% | 1,185 | +| save heavy PDF (9.9MB) | 850.9 | 1.18ms | 1.58ms | ±0.91% | 426 | +| incremental save heavy PDF (9.9MB) | 494.7 | 2.02ms | 2.37ms | ±0.74% | 248 | + +- **save unmodified (19KB)** is 3.21x faster than incremental save (19KB) +- **save unmodified (19KB)** is 9.37x faster than save with modifications (19KB) +- **save unmodified (19KB)** is 26.10x faster than save heavy PDF (9.9MB) +- **save unmodified (19KB)** is 44.89x faster than incremental save heavy PDF (9.9MB) + +## Splitting + +### Extract single page + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :--------------------------------------- | ------: | ------: | ------: | -----: | ------: | +| extractPages (1 page from small PDF) | 2.2K | 452us | 931us | ±1.63% | 1,106 | +| extractPages (1 page from 100-page PDF) | 536.3 | 1.86ms | 3.04ms | ±1.54% | 269 | +| extractPages (1 page from 2000-page PDF) | 24.7 | 40.43ms | 42.46ms | ±2.46% | 13 | + +- **extractPages (1 page from small PDF)** is 4.12x faster than extractPages (1 page from 100-page PDF) +- **extractPages (1 page from small PDF)** is 89.36x faster than extractPages (1 page from 2000-page PDF) + +### Split into single-page PDFs + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------------------------- | ------: | -------: | -------: | -----: | ------: | +| split 100-page PDF (0.1MB) | 32.2 | 31.02ms | 35.44ms | ±2.72% | 17 | +| split 2000-page PDF (0.9MB) | 1.8 | 550.66ms | 550.66ms | ±0.00% | 1 | + +- **split 100-page PDF (0.1MB)** is 17.75x faster than split 2000-page PDF (0.9MB) + +### Batch page extraction + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :----------------------------------------------------- | ------: | ------: | ------: | -----: | ------: | +| extract first 10 pages from 2000-page PDF | 23.3 | 42.90ms | 46.80ms | ±3.59% | 12 | +| extract first 100 pages from 2000-page PDF | 20.1 | 49.79ms | 52.72ms | ±2.98% | 11 | +| extract every 10th page from 2000-page PDF (200 pages) | 18.6 | 53.74ms | 59.30ms | ±3.19% | 10 | + +- **extract first 10 pages from 2000-page PDF** is 1.16x faster than extract first 100 pages from 2000-page PDF +- **extract first 10 pages from 2000-page PDF** is 1.25x faster than extract every 10th page from 2000-page PDF (200 pages) + +--- + +_Results are machine-dependent. Use for relative comparison only._ diff --git a/scripts/bench-report.ts b/scripts/bench-report.ts new file mode 100644 index 0000000..d16617d --- /dev/null +++ b/scripts/bench-report.ts @@ -0,0 +1,262 @@ +/** + * Benchmark report generator. + * + * Runs vitest bench with JSON output, then transforms the results + * into a markdown report saved to reports/benchmarks.md. + * + * Usage: + * bun run scripts/bench-report.ts + * bun run scripts/bench-report.ts --json-only # Just dump JSON, skip markdown + * bun run scripts/bench-report.ts --from-json results.json # Generate from existing JSON + */ + +import { execSync } from "node:child_process"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { cpus, platform, totalmem } from "node:os"; + +// ───────────────────────────────────────────────────────────────────────────── +// Types for vitest bench JSON output +// ───────────────────────────────────────────────────────────────────────────── + +interface BenchmarkResult { + name: string; + rank: number; + rme: number; + hz: number; + min: number; + max: number; + mean: number; + p75: number; + p99: number; + p995: number; + p999: number; + sampleCount: number; + median: number; +} + +interface BenchmarkGroup { + fullName: string; + benchmarks: BenchmarkResult[]; +} + +interface BenchmarkFile { + filepath: string; + groups: BenchmarkGroup[]; +} + +interface BenchmarkOutput { + files: BenchmarkFile[]; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function formatHz(hz: number): string { + if (hz >= 1000) { + return `${(hz / 1000).toFixed(1)}K`; + } + + if (hz >= 1) { + return hz.toFixed(1); + } + + return hz.toFixed(3); +} + +function formatTime(ms: number): string { + if (ms >= 1000) { + return `${(ms / 1000).toFixed(2)}s`; + } + + if (ms >= 1) { + return `${ms.toFixed(2)}ms`; + } + + return `${(ms * 1000).toFixed(0)}us`; +} + +function formatRme(rme: number): string { + return `\u00b1${rme.toFixed(2)}%`; +} + +function getSystemInfo(): string { + const cpu = cpus()[0]; + const cpuModel = cpu?.model ?? "Unknown CPU"; + const cpuCount = cpus().length; + const mem = (totalmem() / 1024 / 1024 / 1024).toFixed(0); + const os = platform(); + const runtime = `Bun ${process.versions.bun ?? "unknown"}`; + + return `${os} | ${cpuModel} (${cpuCount} cores) | ${mem}GB RAM | ${runtime}`; +} + +/** + * Extract a short file label from a benchmark filepath. + * e.g. "/Users/.../benchmarks/loading.bench.ts" -> "Loading" + */ +function fileLabel(filepath: string): string { + const match = filepath.match(/([^/]+)\.bench\.ts$/); + + if (!match) { + return filepath; + } + + const name = match[1]; + + return name.charAt(0).toUpperCase() + name.slice(1); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Markdown generation +// ───────────────────────────────────────────────────────────────────────────── + +function generateMarkdown(data: BenchmarkOutput): string { + const lines: string[] = []; + const now = new Date(); + const dateStr = now.toISOString().split("T")[0]; + const timeStr = now.toISOString().split("T")[1].split(".")[0]; + + lines.push("# Benchmark Report"); + lines.push(""); + lines.push(`> Generated on ${dateStr} at ${timeStr} UTC`); + lines.push(`>`); + lines.push(`> System: ${getSystemInfo()}`); + lines.push(""); + lines.push("---"); + lines.push(""); + + // Table of contents + lines.push("## Contents"); + lines.push(""); + + for (const file of data.files) { + const label = fileLabel(file.filepath); + const anchor = label.toLowerCase().replace(/\s+/g, "-"); + lines.push(`- [${label}](#${anchor})`); + } + + lines.push(""); + + // Each file becomes a section + for (const file of data.files) { + const label = fileLabel(file.filepath); + lines.push(`## ${label}`); + lines.push(""); + + for (const group of file.groups) { + // If the group name differs from the file-level name, add a subheading + const groupName = group.fullName.replace(/^benchmarks\/[^>]+> /, "").trim(); + const isTopLevel = group.benchmarks.length > 0 && !groupName.includes(" > "); + + // Check if this group has a describe() wrapper (indicated by " > " in fullName) + const describeName = group.fullName.includes(" > ") + ? group.fullName.split(" > ").slice(1).join(" > ") + : null; + + if (describeName) { + lines.push(`### ${describeName}`); + lines.push(""); + } + + // Build the results table + lines.push("| Benchmark | ops/sec | Mean | p99 | RME | Samples |"); + lines.push("|:---|---:|---:|---:|---:|---:|"); + + // Sort by rank + const sorted = [...group.benchmarks].sort((a, b) => a.rank - b.rank); + + for (const bench of sorted) { + const name = bench.name; + const hz = formatHz(bench.hz); + const mean = formatTime(bench.mean); + const p99 = formatTime(bench.p99); + const rme = formatRme(bench.rme); + const samples = bench.sampleCount.toLocaleString(); + + lines.push(`| ${name} | ${hz} | ${mean} | ${p99} | ${rme} | ${samples} |`); + } + + lines.push(""); + + // Add comparison summary for groups with multiple benchmarks + if (sorted.length >= 2) { + const fastest = sorted[0]; + const rest = sorted.slice(1); + + for (const slower of rest) { + const ratio = (fastest.hz / slower.hz).toFixed(2); + lines.push(`- **${fastest.name}** is ${ratio}x faster than ${slower.name}`); + } + + lines.push(""); + } + } + } + + // Footer + lines.push("---"); + lines.push(""); + lines.push("*Results are machine-dependent. Use for relative comparison only.*"); + lines.push(""); + + return lines.join("\n"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Main +// ───────────────────────────────────────────────────────────────────────────── + +const args = process.argv.slice(2); +const jsonOnly = args.includes("--json-only"); +const fromJsonIdx = args.indexOf("--from-json"); + +const jsonPath = "reports/bench-results.json"; +const mdPath = "reports/benchmarks.md"; + +mkdirSync("reports", { recursive: true }); + +let data: BenchmarkOutput; + +if (fromJsonIdx !== -1 && args[fromJsonIdx + 1]) { + // Generate markdown from an existing JSON file + const inputPath = args[fromJsonIdx + 1]; + + if (!existsSync(inputPath)) { + console.error(`File not found: ${inputPath}`); + process.exit(1); + } + + data = JSON.parse(readFileSync(inputPath, "utf-8")) as BenchmarkOutput; + console.log(`Loaded benchmark results from ${inputPath}`); +} else { + // Run benchmarks and capture JSON + console.log("Running benchmarks...\n"); + + try { + execSync(`bun run bench -- --outputJson ${jsonPath}`, { + stdio: "inherit", + timeout: 600_000, // 10 minute timeout + }); + } catch (error) { + console.error("Benchmark run failed"); + process.exit(1); + } + + if (!existsSync(jsonPath)) { + console.error(`Expected JSON output at ${jsonPath} but file not found`); + process.exit(1); + } + + data = JSON.parse(readFileSync(jsonPath, "utf-8")) as BenchmarkOutput; + console.log(`\nBenchmark JSON saved to ${jsonPath}`); +} + +if (jsonOnly) { + process.exit(0); +} + +// Generate and write markdown report +const md = generateMarkdown(data); +writeFileSync(mdPath, md); +console.log(`Benchmark report saved to ${mdPath}`); From 2fe6842ba2b6d3724c4708a6a9b40e0a262e2fac Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 10:51:02 +1100 Subject: [PATCH 2/9] perf(copier): make ObjectCopier fully synchronous MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ObjectCopier does zero I/O — every method was async but never awaited anything asynchronous. Removing async/await eliminates microtask scheduling overhead on every recursive call in the deep-copy graph walk. Benchmarks show ~15% improvement on full-split workloads: - 100-page split: 31.6ms → 27.3ms (1.16x) - 2000-page split: 582.5ms → 506.6ms (1.15x) --- reports/baselines/baseline.json | 224 ++++++++++++++++++++ reports/baselines/comparison-sync-copier.md | 28 +++ reports/baselines/sync-copier.json | 224 ++++++++++++++++++++ src/api/pdf.ts | 6 +- src/document/object-copier.test.ts | 50 ++--- src/document/object-copier.ts | 46 ++-- 6 files changed, 529 insertions(+), 49 deletions(-) create mode 100644 reports/baselines/baseline.json create mode 100644 reports/baselines/comparison-sync-copier.md create mode 100644 reports/baselines/sync-copier.json diff --git a/reports/baselines/baseline.json b/reports/baselines/baseline.json new file mode 100644 index 0000000..bae2d4c --- /dev/null +++ b/reports/baselines/baseline.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", + "groups": [ + { + "fullName": "benchmarks/splitting.bench.ts > Extract single page", + "benchmarks": [ + { + "id": "-1072438084_0_0", + "name": "extractPages (1 page from small PDF)", + "rank": 1, + "rme": 1.598586232947195, + "samples": [], + "totalTime": 500.1328439999987, + "min": 0.3573750000000473, + "max": 2.4449999999999363, + "hz": 2215.411391778147, + "period": 0.45138343321299523, + "mean": 0.45138343321299523, + "variance": 0.015017269876550877, + "sd": 0.12254497083336745, + "sem": 0.0036815068475241517, + "df": 1107, + "critical": 1.96, + "moe": 0.007215753421147338, + "p75": 0.4394999999999527, + "p99": 0.8506250000000364, + "p995": 0.9532910000000356, + "p999": 1.7487909999999829, + "sampleCount": 1108, + "median": 0.4120829999999387 + }, + { + "id": "-1072438084_0_1", + "name": "extractPages (1 page from 100-page PDF)", + "rank": 2, + "rme": 1.14326877952979, + "samples": [], + "totalTime": 500.9256660000017, + "min": 1.502375000000029, + "max": 2.347500000000082, + "hz": 560.9614740722809, + "period": 1.782653615658369, + "mean": 1.782653615658369, + "variance": 0.03038259004746126, + "sd": 0.17430602412843127, + "sem": 0.010398225630092402, + "df": 280, + "critical": 1.96, + "moe": 0.02038052223498111, + "p75": 1.8196669999999813, + "p99": 2.319291000000021, + "p995": 2.3412920000000668, + "p999": 2.347500000000082, + "sampleCount": 281, + "median": 1.7196670000000722 + }, + { + "id": "-1072438084_0_2", + "name": "extractPages (1 page from 2000-page PDF)", + "rank": 3, + "rme": 2.6124445200935758, + "samples": [], + "totalTime": 531.2322489999992, + "min": 37.85783300000003, + "max": 44.21758299999965, + "hz": 24.471405914214404, + "period": 40.864019153846094, + "mean": 40.864019153846094, + "variance": 3.120363595900889, + "sd": 1.7664550930892324, + "sem": 0.4899264933798263, + "df": 12, + "critical": 2.179, + "moe": 1.0675498290746415, + "p75": 42.14979199999971, + "p99": 44.21758299999965, + "p995": 44.21758299999965, + "p999": 44.21758299999965, + "sampleCount": 13, + "median": 40.22212499999978 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", + "benchmarks": [ + { + "id": "-1072438084_1_0", + "name": "split 100-page PDF (0.1MB)", + "rank": 1, + "rme": 1.8856591767197517, + "samples": [], + "totalTime": 506.2218300000013, + "min": 30.098917000000256, + "max": 34.55699999999979, + "hz": 31.60669700870063, + "period": 31.638864375000082, + "mean": 31.638864375000082, + "variance": 1.2540684871776795, + "sd": 1.119851993424881, + "sem": 0.27996299835622024, + "df": 15, + "critical": 2.131, + "moe": 0.5966011494971053, + "p75": 32.05224999999973, + "p99": 34.55699999999979, + "p995": 34.55699999999979, + "p999": 34.55699999999979, + "sampleCount": 16, + "median": 31.37264550000009 + }, + { + "id": "-1072438084_1_1", + "name": "split 2000-page PDF (0.9MB)", + "rank": 2, + "rme": 0, + "samples": [], + "totalTime": 582.5043749999995, + "min": 582.5043749999995, + "max": 582.5043749999995, + "hz": 1.716725303565318, + "period": 582.5043749999995, + "mean": 582.5043749999995, + "variance": 0, + "sd": 0, + "sem": 0, + "df": 0, + "critical": 12.71, + "moe": 0, + "p75": 582.5043749999995, + "p99": 582.5043749999995, + "p995": 582.5043749999995, + "p999": 582.5043749999995, + "sampleCount": 1, + "median": 582.5043749999995 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", + "benchmarks": [ + { + "id": "-1072438084_2_0", + "name": "extract first 10 pages from 2000-page PDF", + "rank": 1, + "rme": 3.094676818514817, + "samples": [], + "totalTime": 514.4472479999995, + "min": 39.471832999999606, + "max": 46.71299999999974, + "hz": 23.326006790107296, + "period": 42.87060399999996, + "mean": 42.87060399999996, + "variance": 4.360044973374979, + "sd": 2.0880720709245115, + "sem": 0.602774486117803, + "df": 11, + "critical": 2.201, + "moe": 1.3267066439452846, + "p75": 43.80012500000066, + "p99": 46.71299999999974, + "p995": 46.71299999999974, + "p999": 46.71299999999974, + "sampleCount": 12, + "median": 42.56633349999993 + }, + { + "id": "-1072438084_2_1", + "name": "extract first 100 pages from 2000-page PDF", + "rank": 2, + "rme": 3.2071455847378436, + "samples": [], + "totalTime": 509.16504200000236, + "min": 48.13025000000016, + "max": 54.86754199999996, + "hz": 19.639997201535987, + "period": 50.91650420000023, + "mean": 50.91650420000023, + "variance": 5.211578755405502, + "sd": 2.282888248558282, + "sem": 0.7219126509076774, + "df": 9, + "critical": 2.262, + "moe": 1.6329664163531663, + "p75": 52.17775000000074, + "p99": 54.86754199999996, + "p995": 54.86754199999996, + "p999": 54.86754199999996, + "sampleCount": 10, + "median": 51.348021000000244 + }, + { + "id": "-1072438084_2_2", + "name": "extract every 10th page from 2000-page PDF (200 pages)", + "rank": 3, + "rme": 4.394613298511672, + "samples": [], + "totalTime": 529.0494579999995, + "min": 54.37287500000002, + "max": 64.11266699999942, + "hz": 17.011642038200534, + "period": 58.78327311111106, + "mean": 58.78327311111106, + "variance": 11.294649634264848, + "sd": 3.360751349663472, + "sem": 1.120250449887824, + "df": 8, + "critical": 2.306, + "moe": 2.5832975374413225, + "p75": 60.755959000000075, + "p99": 64.11266699999942, + "p995": 64.11266699999942, + "p999": 64.11266699999942, + "sampleCount": 9, + "median": 57.71983300000011 + } + ] + } + ] + } + ] +} diff --git a/reports/baselines/comparison-sync-copier.md b/reports/baselines/comparison-sync-copier.md new file mode 100644 index 0000000..8752df2 --- /dev/null +++ b/reports/baselines/comparison-sync-copier.md @@ -0,0 +1,28 @@ +# Benchmark Comparison: Sync ObjectCopier + +## Change + +Removed async/await from all ObjectCopier methods. The copier does zero I/O — +every method was async but never awaited anything asynchronous. Removing the +async overhead eliminates microtask scheduling on every recursive call in the +deep-copy graph walk. + +## Results + +| Benchmark | Baseline (ms) | Sync (ms) | Speedup | +| :----------------------------------------------------- | ------------: | --------: | ------: | +| extractPages (1 page from small PDF) | 0.45 | 0.40 | 1.14x | +| extractPages (1 page from 100-page PDF) | 1.78 | 1.74 | 1.03x | +| extractPages (1 page from 2000-page PDF) | 40.86 | 41.04 | 1.00x | +| split 100-page PDF (0.1MB) | 31.64 | 27.35 | 1.16x | +| split 2000-page PDF (0.9MB) | 582.50 | 506.61 | 1.15x | +| extract first 10 pages from 2000-page PDF | 42.87 | 43.21 | 0.99x | +| extract first 100 pages from 2000-page PDF | 50.92 | 53.87 | 0.95x | +| extract every 10th page from 2000-page PDF (200 pages) | 58.78 | 56.65 | 1.04x | + +## Key Takeaways + +- **100-page split**: 31.6ms -> 27.3ms (1.16x faster) +- **2000-page split**: 582.5ms -> 506.6ms (1.15x faster) +- Single-page extraction from small PDFs: ~14% faster (0.45ms -> 0.40ms) +- Batch extraction noise is within margin of error for single runs diff --git a/reports/baselines/sync-copier.json b/reports/baselines/sync-copier.json new file mode 100644 index 0000000..7b6bd2f --- /dev/null +++ b/reports/baselines/sync-copier.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", + "groups": [ + { + "fullName": "benchmarks/splitting.bench.ts > Extract single page", + "benchmarks": [ + { + "id": "-1072438084_0_0", + "name": "extractPages (1 page from small PDF)", + "rank": 1, + "rme": 0.8797965308418093, + "samples": [], + "totalTime": 500.17104999999685, + "min": 0.3400409999999283, + "max": 0.8508749999998599, + "hz": 2517.1388867868463, + "period": 0.39727644956314284, + "mean": 0.39727644956314284, + "variance": 0.004003720204126598, + "sd": 0.0632749571641625, + "sem": 0.0017832777658714494, + "df": 1258, + "critical": 1.96, + "moe": 0.0034952244211080407, + "p75": 0.3927090000001954, + "p99": 0.74350000000004, + "p995": 0.7761249999998654, + "p999": 0.8387500000000045, + "sampleCount": 1259, + "median": 0.38120900000012625 + }, + { + "id": "-1072438084_0_1", + "name": "extractPages (1 page from 100-page PDF)", + "rank": 2, + "rme": 0.9765406302412275, + "samples": [], + "totalTime": 500.3271310000009, + "min": 1.584292000000005, + "max": 2.372624999999971, + "hz": 575.6233914886368, + "period": 1.737246982638892, + "mean": 1.737246982638892, + "variance": 0.021576654723250537, + "sd": 0.14688994085113705, + "sem": 0.008655572771994156, + "df": 287, + "critical": 1.96, + "moe": 0.016964922633108545, + "p75": 1.756249999999909, + "p99": 2.2702919999999267, + "p995": 2.3456670000000486, + "p999": 2.372624999999971, + "sampleCount": 288, + "median": 1.6839165000000094 + }, + { + "id": "-1072438084_0_2", + "name": "extractPages (1 page from 2000-page PDF)", + "rank": 3, + "rme": 2.1980090898940152, + "samples": [], + "totalTime": 533.5146669999999, + "min": 38.59916599999997, + "max": 43.44508300000007, + "hz": 24.366715301568274, + "period": 41.039589769230766, + "mean": 41.039589769230766, + "variance": 2.227890706742653, + "sd": 1.4926120416044664, + "sem": 0.4139760961830687, + "df": 12, + "critical": 2.179, + "moe": 0.9020539135829065, + "p75": 42.321292000000085, + "p99": 43.44508300000007, + "p995": 43.44508300000007, + "p999": 43.44508300000007, + "sampleCount": 13, + "median": 40.792207999999846 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", + "benchmarks": [ + { + "id": "-1072438084_1_0", + "name": "split 100-page PDF (0.1MB)", + "rank": 1, + "rme": 1.9707618954593826, + "samples": [], + "totalTime": 519.5589599999998, + "min": 25.109875000000102, + "max": 29.522958000000017, + "hz": 36.569478081948596, + "period": 27.34520842105262, + "mean": 27.34520842105262, + "variance": 1.2500641058887851, + "sd": 1.11806265740735, + "sem": 0.25650116506237763, + "df": 18, + "critical": 2.101, + "moe": 0.5389089477960554, + "p75": 28.18224999999984, + "p99": 29.522958000000017, + "p995": 29.522958000000017, + "p999": 29.522958000000017, + "sampleCount": 19, + "median": 27.39729099999977 + }, + { + "id": "-1072438084_1_1", + "name": "split 2000-page PDF (0.9MB)", + "rank": 2, + "rme": 0, + "samples": [], + "totalTime": 506.605583, + "min": 506.605583, + "max": 506.605583, + "hz": 1.9739221863253724, + "period": 506.605583, + "mean": 506.605583, + "variance": 0, + "sd": 0, + "sem": 0, + "df": 0, + "critical": 12.71, + "moe": 0, + "p75": 506.605583, + "p99": 506.605583, + "p995": 506.605583, + "p999": 506.605583, + "sampleCount": 1, + "median": 506.605583 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", + "benchmarks": [ + { + "id": "-1072438084_2_0", + "name": "extract first 10 pages from 2000-page PDF", + "rank": 1, + "rme": 2.7205867827906096, + "samples": [], + "totalTime": 518.4931240000024, + "min": 40.28308300000026, + "max": 45.694250000000466, + "hz": 23.143990623104088, + "period": 43.20776033333353, + "mean": 43.20776033333353, + "variance": 3.422865540761208, + "sd": 1.850098792162518, + "sem": 0.5340775178412157, + "df": 11, + "critical": 2.201, + "moe": 1.175504616768516, + "p75": 44.60529199999928, + "p99": 45.694250000000466, + "p995": 45.694250000000466, + "p999": 45.694250000000466, + "sampleCount": 12, + "median": 43.13712450000003 + }, + { + "id": "-1072438084_2_1", + "name": "extract first 100 pages from 2000-page PDF", + "rank": 2, + "rme": 6.90077055615189, + "samples": [], + "totalTime": 538.6501679999983, + "min": 48.26045899999917, + "max": 65.11799999999948, + "hz": 18.56492505540262, + "period": 53.86501679999983, + "mean": 53.86501679999983, + "variance": 27.003718600062566, + "sd": 5.196510232844978, + "sem": 1.6432808220162056, + "df": 9, + "critical": 2.262, + "moe": 3.717101219400657, + "p75": 54.15062500000022, + "p99": 65.11799999999948, + "p995": 65.11799999999948, + "p999": 65.11799999999948, + "sampleCount": 10, + "median": 53.544791499999974 + }, + { + "id": "-1072438084_2_2", + "name": "extract every 10th page from 2000-page PDF (200 pages)", + "rank": 3, + "rme": 3.996294175601932, + "samples": [], + "totalTime": 509.82821200000035, + "min": 52.99541700000009, + "max": 62.211042000000816, + "hz": 17.65300504790424, + "period": 56.64757911111115, + "mean": 56.64757911111115, + "variance": 8.673642398208653, + "sd": 2.9451048195622263, + "sem": 0.9817016065207421, + "df": 8, + "critical": 2.306, + "moe": 2.2638039046368315, + "p75": 58.50329199999942, + "p99": 62.211042000000816, + "p995": 62.211042000000816, + "p999": 62.211042000000816, + "sampleCount": 9, + "median": 56.19641700000011 + } + ] + } + ] + } + ] +} diff --git a/src/api/pdf.ts b/src/api/pdf.ts index 1ac4296..d34fc69 100644 --- a/src/api/pdf.ts +++ b/src/api/pdf.ts @@ -1698,6 +1698,7 @@ export class PDF { * const [duplicate] = await pdf.copyPagesFrom(pdf, [0], { insertAt: 1 }); * ``` */ + // oxlint-disable-next-line typescript/require-await -- Public async API kept for backward compat; ObjectCopier is sync. async copyPagesFrom( source: PDF, indices: number[], @@ -1729,7 +1730,7 @@ export class PDF { throw new Error(`Source page ${index} not found`); } - const copiedPageRef = await copier.copyPage(srcPage.ref); + const copiedPageRef = copier.copyPage(srcPage.ref); copiedRefs.push(copiedPageRef); } @@ -1824,6 +1825,7 @@ export class PDF { * } * ``` */ + // oxlint-disable-next-line typescript/require-await -- Public async API kept for backward compat; ObjectCopier is sync. async embedPage(source: PDF, pageIndex: number): Promise { const srcPage = source.getPage(pageIndex); @@ -1842,7 +1844,7 @@ export class PDF { let resources: PdfDict; if (srcResources) { - const copied = await copier.copyObject(srcResources); + const copied = copier.copyObject(srcResources); // This is guaranteed by our checks above resources = copied instanceof PdfDict ? copied : new PdfDict(); diff --git a/src/document/object-copier.test.ts b/src/document/object-copier.test.ts index fae721f..b62deab 100644 --- a/src/document/object-copier.test.ts +++ b/src/document/object-copier.test.ts @@ -23,7 +23,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); // The copied page should be registered in dest expect(copiedPageRef).toBeInstanceOf(PdfRef); @@ -47,7 +47,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; @@ -73,7 +73,7 @@ describe("ObjectCopier", () => { expect(srcPage.has("Parent")).toBe(true); const copier = new ObjectCopier(source, dest); - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.has("Parent")).toBe(false); @@ -92,7 +92,7 @@ describe("ObjectCopier", () => { expect(srcPage.has("Annots")).toBe(true); const copier = new ObjectCopier(source, dest, { includeAnnotations: false }); - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.has("Annots")).toBe(false); @@ -111,7 +111,7 @@ describe("ObjectCopier", () => { expect(srcPage.has("Annots")).toBe(true); const copier = new ObjectCopier(source, dest); - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.has("Annots")).toBe(true); @@ -138,7 +138,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const fakeRef = PdfRef.of(99999, 0); - await expect(copier.copyPage(fakeRef)).rejects.toThrow(/not found/); + expect(() => copier.copyPage(fakeRef)).toThrow(/not found/); }); it("copies page Resources", async () => { @@ -150,7 +150,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; @@ -181,9 +181,9 @@ describe("ObjectCopier", () => { const str = PdfString.fromString("hello"); // Primitives should be returned as-is (they're immutable) - expect(await copier.copyObject(name)).toBe(name); - expect(await copier.copyObject(num)).toBe(num); - expect(await copier.copyObject(str)).toBe(str); + expect(copier.copyObject(name)).toBe(name); + expect(copier.copyObject(num)).toBe(num); + expect(copier.copyObject(str)).toBe(str); }); it("creates new instance for arrays", async () => { @@ -195,7 +195,7 @@ describe("ObjectCopier", () => { const arr = new PdfArray([PdfNumber.of(1), PdfNumber.of(2), PdfName.of("Test")]); - const copied = await copier.copyObject(arr); + const copied = copier.copyObject(arr); expect(copied).toBeInstanceOf(PdfArray); expect(copied).not.toBe(arr); expect(copied.length).toBe(3); @@ -218,7 +218,7 @@ describe("ObjectCopier", () => { Key2: PdfString.fromString("value"), }); - const copied = await copier.copyObject(dict); + const copied = copier.copyObject(dict); expect(copied).toBeInstanceOf(PdfDict); expect(copied).not.toBe(dict); @@ -241,7 +241,7 @@ describe("ObjectCopier", () => { Array: innerArr, }); - const copied = await copier.copyObject(outerDict); + const copied = copier.copyObject(outerDict); // Outer should be new expect(copied).not.toBe(outerDict); @@ -269,7 +269,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); // Refs should be different expect(copiedPageRef.objectNumber).not.toBe(srcPageRef.objectNumber); @@ -290,8 +290,8 @@ describe("ObjectCopier", () => { // Copy same page twice - internal resources should be deduplicated const srcPageRef = source.getPage(0)!.ref; - const copied1 = await copier.copyPage(srcPageRef); - const copied2 = await copier.copyPage(srcPageRef); + const copied1 = copier.copyPage(srcPageRef); + const copied2 = copier.copyPage(srcPageRef); // Page refs are different (each copyPage registers a new page) expect(copied1.objectNumber).not.toBe(copied2.objectNumber); @@ -321,7 +321,7 @@ describe("ObjectCopier", () => { const srcPageRef = source.getPage(0)!.ref; // This should not throw due to circular references - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); expect(copiedPageRef).toBeInstanceOf(PdfRef); // Page should be valid @@ -342,7 +342,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; @@ -365,7 +365,7 @@ describe("ObjectCopier", () => { const srcPageRef = source.getPage(0)!.ref; // Should not throw - streams are decoded and re-encoded - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.getName("Type")?.value).toBe("Page"); @@ -381,7 +381,7 @@ describe("ObjectCopier", () => { const originalPageRef = pdf.getPage(0)!.ref; const copier = new ObjectCopier(pdf, pdf); - const duplicatedRef = await copier.copyPage(originalPageRef); + const duplicatedRef = copier.copyPage(originalPageRef); // Refs should be different expect(duplicatedRef.objectNumber).not.toBe(originalPageRef.objectNumber); @@ -411,7 +411,7 @@ describe("ObjectCopier", () => { // Copy a page const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(destOriginalCount, copiedPageRef); // Save @@ -444,7 +444,7 @@ describe("ObjectCopier", () => { for (let i = 0; i < source.getPageCount(); i++) { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(i)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(dest.getPageCount(), copiedPageRef); } @@ -468,7 +468,7 @@ describe("ObjectCopier", () => { for (let i = 0; i < originalCount; i++) { const copier = new ObjectCopier(pdf, pdf); const srcPageRef = pdf.getPage(i)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); // Insert duplicate after the original pdf.insertPage(i * 2 + 1, copiedPageRef); } @@ -493,7 +493,7 @@ describe("ObjectCopier", () => { // Copy form page with annotations const copier = new ObjectCopier(source, dest, { includeAnnotations: true }); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(dest.getPageCount(), copiedPageRef); const savedBytes = await dest.save(); @@ -516,7 +516,7 @@ describe("ObjectCopier", () => { // Copy from encrypted source const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(dest.getPageCount(), copiedPageRef); const savedBytes = await dest.save(); diff --git a/src/document/object-copier.ts b/src/document/object-copier.ts index 78c8c15..7e5d719 100644 --- a/src/document/object-copier.ts +++ b/src/document/object-copier.ts @@ -10,6 +10,8 @@ * - Flattens inherited page attributes during page copy * - Smart stream handling: raw bytes if unencrypted, re-encode if encrypted * - Circular reference detection + * + * All operations are synchronous — there is no I/O involved in object copying. */ import type { PDF } from "#src/api/pdf"; @@ -46,7 +48,7 @@ const INHERITABLE_PAGE_ATTRS = ["Resources", "MediaBox", "CropBox", "Rotate"] as * @example * ```typescript * const copier = new ObjectCopier(sourcePdf, destPdf); - * const copiedPageRef = await copier.copyPage(sourcePageRef); + * const copiedPageRef = copier.copyPage(sourcePageRef); * destPdf.insertPage(0, copiedPageRef); * ``` */ @@ -79,7 +81,7 @@ export class ObjectCopier { * @param srcPageRef Reference to the page in source document * @returns Reference to the copied page in destination document */ - async copyPage(srcPageRef: PdfRef): Promise { + copyPage(srcPageRef: PdfRef): PdfRef { const srcPage = this.source.getObject(srcPageRef); if (!(srcPage instanceof PdfDict)) { @@ -98,7 +100,7 @@ export class ObjectCopier { if (inherited) { // Deep copy the inherited value - const copied = await this.copyObject(inherited); + const copied = this.copyObject(inherited); cloned.set(key, copied); } } @@ -125,7 +127,7 @@ export class ObjectCopier { cloned.delete("Parent"); // Deep copy all values in the cloned dict, remapping refs - const copiedPage = await this.copyDictValues(cloned); + const copiedPage = this.copyDictValues(cloned); // Register in destination and return ref return this.dest.register(copiedPage); @@ -134,25 +136,25 @@ export class ObjectCopier { /** * Deep copy any PDF object, remapping references to destination. */ - async copyObject(obj: T): Promise { + copyObject(obj: T): T { if (obj instanceof PdfRef) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyRef(obj)) as unknown as T; + return this.copyRef(obj) as unknown as T; } if (obj instanceof PdfStream) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyStream(obj)) as unknown as T; + return this.copyStream(obj) as unknown as T; } if (obj instanceof PdfDict) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyDict(obj)) as unknown as T; + return this.copyDict(obj) as unknown as T; } if (obj instanceof PdfArray) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyArray(obj)) as unknown as T; + return this.copyArray(obj) as unknown as T; } // Primitives (PdfName, PdfNumber, PdfString, PdfBool, PdfNull) @@ -166,7 +168,7 @@ export class ObjectCopier { * Handles circular references by registering a placeholder before * recursively copying the referenced object's contents. */ - private async copyRef(ref: PdfRef): Promise { + private copyRef(ref: PdfRef): PdfRef { const key = `${ref.objectNumber}:${ref.generation}`; // Already copied (or being copied)? @@ -206,7 +208,7 @@ export class ObjectCopier { const items: PdfObject[] = []; for (const item of srcObj) { - items.push(await this.copyObject(item)); + items.push(this.copyObject(item)); } const copiedArr = new PdfArray(items); @@ -226,7 +228,7 @@ export class ObjectCopier { /** * Copy a dict reference, handling circular references. */ - private async copyDictRef(key: string, srcDict: PdfDict): Promise { + private copyDictRef(key: string, srcDict: PdfDict): PdfRef { // Clone the dict shell first const cloned = srcDict.clone(); @@ -235,7 +237,7 @@ export class ObjectCopier { this.refMap.set(key, destRef); // Now copy all values (which may reference back to us) - await this.copyDictValues(cloned); + this.copyDictValues(cloned); return destRef; } @@ -243,7 +245,7 @@ export class ObjectCopier { /** * Copy a stream reference, handling circular references and encryption. */ - private async copyStreamRef(key: string, srcStream: PdfStream): Promise { + private copyStreamRef(key: string, srcStream: PdfStream): PdfRef { const sourceWasEncrypted = this.source.isEncrypted; // Clone the stream's dictionary @@ -310,7 +312,7 @@ export class ObjectCopier { // Now copy dict values (which may reference back to us) // Note: we modify the already-registered stream's dict entries for (const [entryKey, value] of clonedDict) { - const copied = await this.copyObject(value); + const copied = this.copyObject(value); copiedStream.set(entryKey.value, copied); } @@ -321,7 +323,7 @@ export class ObjectCopier { /** * Copy a dictionary, remapping all reference values. */ - private async copyDict(dict: PdfDict): Promise { + private copyDict(dict: PdfDict): PdfDict { const cloned = dict.clone(); return this.copyDictValues(cloned); @@ -331,9 +333,9 @@ export class ObjectCopier { * Copy all values in a dictionary, remapping references. * Modifies the dict in place and returns it. */ - private async copyDictValues(dict: PdfDict): Promise { + private copyDictValues(dict: PdfDict): PdfDict { for (const [key, value] of dict) { - const copied = await this.copyObject(value); + const copied = this.copyObject(value); dict.set(key.value, copied); } @@ -344,11 +346,11 @@ export class ObjectCopier { /** * Copy an array, remapping all reference elements. */ - private async copyArray(arr: PdfArray): Promise { + private copyArray(arr: PdfArray): PdfArray { const items: PdfObject[] = []; for (const item of arr) { - items.push(await this.copyObject(item)); + items.push(this.copyObject(item)); } return new PdfArray(items); @@ -360,14 +362,14 @@ export class ObjectCopier { * If source wasn't encrypted, copies raw encoded bytes (fastest). * If source was encrypted, decodes and re-encodes with same filters. */ - private async copyStream(stream: PdfStream): Promise { + private copyStream(stream: PdfStream): PdfStream { const sourceWasEncrypted = this.source.isEncrypted; // Clone the stream's dictionary const clonedDict = stream.clone(); // Copy dict values (remapping refs, but not stream data yet) - await this.copyDictValues(clonedDict); + this.copyDictValues(clonedDict); if (!sourceWasEncrypted) { // Source wasn't encrypted - copy raw encoded bytes directly From a17416a9d455a138a90bc3f31f277bc06c858d0f Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 10:56:04 +1100 Subject: [PATCH 3/9] perf(cache): replace internal LRU cache with npm lru-cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The internal LRU cache did Map.delete()+Map.set() on every get() to maintain recency ordering. The npm lru-cache package uses a doubly-linked-list for O(1) operations without Map rehashing. Benchmarks show significant gains especially on large PDF parsing: - 2000-page split: 506.6ms → 432.3ms (1.17x incremental) - Single page from 2000p: 41.0ms → 25.5ms (1.61x incremental) - Cumulative from baseline: 1.35x–1.60x across split workloads --- bun.lock | 5 +- package.json | 1 + reports/baselines/comparison-npm-lru-cache.md | 39 +++ reports/baselines/npm-lru-cache.json | 224 ++++++++++++++++++ src/helpers/lru-cache.test.ts | 18 +- src/helpers/lru-cache.ts | 91 +------ src/objects/pdf-name.ts | 2 +- src/objects/pdf-ref.ts | 2 +- 8 files changed, 283 insertions(+), 99 deletions(-) create mode 100644 reports/baselines/comparison-npm-lru-cache.md create mode 100644 reports/baselines/npm-lru-cache.json diff --git a/bun.lock b/bun.lock index 2d34aa2..23cceae 100644 --- a/bun.lock +++ b/bun.lock @@ -8,6 +8,7 @@ "@noble/ciphers": "^2.1.1", "@noble/hashes": "^2.0.1", "@scure/base": "^2.0.0", + "lru-cache": "^11.2.6", "pako": "^2.1.0", "pkijs": "^3.3.3", }, @@ -492,7 +493,7 @@ "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], - "lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + "lru-cache": ["lru-cache@11.2.6", "", {}, "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ=="], "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], @@ -706,6 +707,8 @@ "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="], + "path-scurry/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + "pdf-lib/pako": ["pako@1.0.11", "", {}, "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="], "pkijs/@noble/hashes": ["@noble/hashes@1.4.0", "", {}, "sha512-V1JJ1WTRUqHHrOSh597hURcMqVKVGL/ea3kv0gSnEdsEZ0/+VyPghM1lMNGc00z7CIQorSvbKpuJkxvuHbvdbg=="], diff --git a/package.json b/package.json index 230f00b..0005466 100644 --- a/package.json +++ b/package.json @@ -67,6 +67,7 @@ "@noble/ciphers": "^2.1.1", "@noble/hashes": "^2.0.1", "@scure/base": "^2.0.0", + "lru-cache": "^11.2.6", "pako": "^2.1.0", "pkijs": "^3.3.3" }, diff --git a/reports/baselines/comparison-npm-lru-cache.md b/reports/baselines/comparison-npm-lru-cache.md new file mode 100644 index 0000000..6bba72c --- /dev/null +++ b/reports/baselines/comparison-npm-lru-cache.md @@ -0,0 +1,39 @@ +# Benchmark Comparison: npm lru-cache + +## Change + +Replaced internal LRU cache (Map delete+set on every get) with the npm `lru-cache` +package (v11, doubly-linked-list, O(1) get/set without Map rehashing). + +## Results (vs original baseline) + +| Benchmark | Baseline (ms) | Sync Copier (ms) | + npm lru-cache (ms) | Total Speedup | +| :----------------------------------------------------- | ------------: | ---------------: | -------------------: | ------------: | +| extractPages (1 page from small PDF) | 0.45 | 0.40 | 0.38 | 1.17x | +| extractPages (1 page from 100-page PDF) | 1.78 | 1.74 | 1.66 | 1.07x | +| extractPages (1 page from 2000-page PDF) | 40.86 | 41.04 | 25.50 | 1.60x | +| split 100-page PDF (0.1MB) | 31.64 | 27.35 | 24.59 | 1.29x | +| split 2000-page PDF (0.9MB) | 582.50 | 506.61 | 432.27 | 1.35x | +| extract first 10 pages from 2000-page PDF | 42.87 | 43.21 | 27.31 | 1.57x | +| extract first 100 pages from 2000-page PDF | 50.92 | 53.87 | 35.21 | 1.45x | +| extract every 10th page from 2000-page PDF (200 pages) | 58.78 | 56.65 | 40.52 | 1.45x | + +## Incremental improvement (sync copier -> + npm lru-cache) + +| Benchmark | Sync Copier (ms) | + npm lru-cache (ms) | Incremental Speedup | +| :----------------------------------------------------- | ---------------: | -------------------: | ------------------: | +| extractPages (1 page from small PDF) | 0.40 | 0.38 | 1.03x | +| extractPages (1 page from 100-page PDF) | 1.74 | 1.66 | 1.05x | +| extractPages (1 page from 2000-page PDF) | 41.04 | 25.50 | 1.61x | +| split 100-page PDF (0.1MB) | 27.35 | 24.59 | 1.11x | +| split 2000-page PDF (0.9MB) | 506.61 | 432.27 | 1.17x | +| extract first 10 pages from 2000-page PDF | 43.21 | 27.31 | 1.58x | +| extract first 100 pages from 2000-page PDF | 53.87 | 35.21 | 1.53x | +| extract every 10th page from 2000-page PDF (200 pages) | 56.65 | 40.52 | 1.40x | + +## Key Takeaways + +- **2000-page split**: 582.5ms -> 432.3ms (1.35x faster total) +- **100-page split**: 31.6ms -> 24.6ms (1.29x faster total) +- **Single page from 2000p**: 40.9ms -> 25.5ms (1.60x faster total) +- **npm lru-cache incremental gain on 2000-page split**: 506.6ms -> 432.3ms (1.17x) diff --git a/reports/baselines/npm-lru-cache.json b/reports/baselines/npm-lru-cache.json new file mode 100644 index 0000000..346c580 --- /dev/null +++ b/reports/baselines/npm-lru-cache.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", + "groups": [ + { + "fullName": "benchmarks/splitting.bench.ts > Extract single page", + "benchmarks": [ + { + "id": "-1072438084_0_0", + "name": "extractPages (1 page from small PDF)", + "rank": 1, + "rme": 0.6803617904005139, + "samples": [], + "totalTime": 500.2104360000012, + "min": 0.31749999999988177, + "max": 0.7807909999999083, + "hz": 2598.9061931526694, + "period": 0.3847772584615394, + "mean": 0.3847772584615394, + "variance": 0.002319155302547339, + "sd": 0.04815760897872048, + "sem": 0.0013356517575117053, + "df": 1299, + "critical": 1.96, + "moe": 0.0026178774447229423, + "p75": 0.38512500000001637, + "p99": 0.615833000000066, + "p995": 0.6623329999999896, + "p999": 0.7236249999998563, + "sampleCount": 1300, + "median": 0.37283300000012787 + }, + { + "id": "-1072438084_0_1", + "name": "extractPages (1 page from 100-page PDF)", + "rank": 2, + "rme": 0.787155411012552, + "samples": [], + "totalTime": 500.06587099999865, + "min": 1.5184580000000096, + "max": 2.1133749999999054, + "hz": 601.92070176291, + "period": 1.6613484086378694, + "mean": 1.6613484086378694, + "variance": 0.013399752069859515, + "sd": 0.11575729812784814, + "sem": 0.006672139742022405, + "df": 300, + "critical": 1.96, + "moe": 0.013077393894363914, + "p75": 1.7012500000000728, + "p99": 2.0060419999999795, + "p995": 2.059125000000222, + "p999": 2.1133749999999054, + "sampleCount": 301, + "median": 1.6227090000002136 + }, + { + "id": "-1072438084_0_2", + "name": "extractPages (1 page from 2000-page PDF)", + "rank": 3, + "rme": 1.172293974657617, + "samples": [], + "totalTime": 509.9992109999989, + "min": 24.20520899999974, + "max": 26.485957999999755, + "hz": 39.215746943577216, + "period": 25.499960549999948, + "mean": 25.499960549999948, + "variance": 0.40798446042404196, + "sd": 0.6387366127161038, + "sem": 0.14282584857511646, + "df": 19, + "critical": 2.093, + "moe": 0.29893450106771874, + "p75": 26.051500000000033, + "p99": 26.485957999999755, + "p995": 26.485957999999755, + "p999": 26.485957999999755, + "sampleCount": 20, + "median": 25.388771000000133 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", + "benchmarks": [ + { + "id": "-1072438084_1_0", + "name": "split 100-page PDF (0.1MB)", + "rank": 1, + "rme": 1.5707569902861633, + "samples": [], + "totalTime": 516.3171660000003, + "min": 23.525666999999885, + "max": 26.712125000000015, + "hz": 40.672674439028796, + "period": 24.586531714285726, + "mean": 24.586531714285726, + "variance": 0.7197857225650164, + "sd": 0.8484018638387214, + "sem": 0.18513646479916948, + "df": 20, + "critical": 2.086, + "moe": 0.3861946655710675, + "p75": 24.975667000000158, + "p99": 26.712125000000015, + "p995": 26.712125000000015, + "p999": 26.712125000000015, + "sampleCount": 21, + "median": 24.294166000000132 + }, + { + "id": "-1072438084_1_1", + "name": "split 2000-page PDF (0.9MB)", + "rank": 2, + "rme": 0, + "samples": [], + "totalTime": 432.27162499999986, + "min": 432.27162499999986, + "max": 432.27162499999986, + "hz": 2.313360262774593, + "period": 432.27162499999986, + "mean": 432.27162499999986, + "variance": 0, + "sd": 0, + "sem": 0, + "df": 0, + "critical": 12.71, + "moe": 0, + "p75": 432.27162499999986, + "p99": 432.27162499999986, + "p995": 432.27162499999986, + "p999": 432.27162499999986, + "sampleCount": 1, + "median": 432.27162499999986 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", + "benchmarks": [ + { + "id": "-1072438084_2_0", + "name": "extract first 10 pages from 2000-page PDF", + "rank": 1, + "rme": 2.223598050281543, + "samples": [], + "totalTime": 518.8923730000015, + "min": 26.314790999999786, + "max": 30.981832999999824, + "hz": 36.61645649202854, + "period": 27.31012489473692, + "mean": 27.31012489473692, + "variance": 1.5873088504924244, + "sd": 1.2598844591836287, + "sem": 0.28903731779572794, + "df": 18, + "critical": 2.101, + "moe": 0.6072674046888245, + "p75": 27.280499999999847, + "p99": 30.981832999999824, + "p995": 30.981832999999824, + "p999": 30.981832999999824, + "sampleCount": 19, + "median": 26.98291700000027 + }, + { + "id": "-1072438084_2_1", + "name": "extract first 100 pages from 2000-page PDF", + "rank": 2, + "rme": 2.213921234147061, + "samples": [], + "totalTime": 528.2034549999998, + "min": 34.1123749999997, + "max": 39.86729100000048, + "hz": 28.398148209765125, + "period": 35.21356366666666, + "mean": 35.21356366666666, + "variance": 1.9814401301546256, + "sd": 1.4076363628986805, + "sem": 0.3634501460681713, + "df": 14, + "critical": 2.145, + "moe": 0.7796005633162274, + "p75": 35.0894159999998, + "p99": 39.86729100000048, + "p995": 39.86729100000048, + "p999": 39.86729100000048, + "sampleCount": 15, + "median": 34.765957999999955 + }, + { + "id": "-1072438084_2_2", + "name": "extract every 10th page from 2000-page PDF (200 pages)", + "rank": 3, + "rme": 2.0448631675208837, + "samples": [], + "totalTime": 526.8241219999973, + "min": 38.62562499999967, + "max": 42.899374999999964, + "hz": 24.67616697323527, + "period": 40.52493246153825, + "mean": 40.52493246153825, + "variance": 1.8801911150990929, + "sd": 1.3712006108148773, + "sem": 0.38030262394158326, + "df": 12, + "critical": 2.179, + "moe": 0.8286794175687099, + "p75": 41.67979099999957, + "p99": 42.899374999999964, + "p995": 42.899374999999964, + "p999": 42.899374999999964, + "sampleCount": 13, + "median": 40.114832999999635 + } + ] + } + ] + } + ] +} diff --git a/src/helpers/lru-cache.test.ts b/src/helpers/lru-cache.test.ts index acb3fe8..7385bc4 100644 --- a/src/helpers/lru-cache.test.ts +++ b/src/helpers/lru-cache.test.ts @@ -4,7 +4,7 @@ import { LRUCache } from "./lru-cache"; describe("LRUCache", () => { it("stores and retrieves values", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("b", 2); @@ -15,7 +15,7 @@ describe("LRUCache", () => { }); it("updates existing values", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("a", 2); @@ -25,7 +25,7 @@ describe("LRUCache", () => { }); it("evicts least recently used when at capacity", () => { - const cache = new LRUCache(3); + const cache = new LRUCache({ max: 3 }); cache.set("a", 1); cache.set("b", 2); @@ -45,7 +45,7 @@ describe("LRUCache", () => { }); it("get updates recency", () => { - const cache = new LRUCache(3); + const cache = new LRUCache({ max: 3 }); cache.set("a", 1); cache.set("b", 2); @@ -64,7 +64,7 @@ describe("LRUCache", () => { }); it("has checks existence without updating recency", () => { - const cache = new LRUCache(3); + const cache = new LRUCache({ max: 3 }); cache.set("a", 1); cache.set("b", 2); @@ -81,7 +81,7 @@ describe("LRUCache", () => { }); it("delete removes entries", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("b", 2); @@ -93,7 +93,7 @@ describe("LRUCache", () => { }); it("clear removes all entries", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("b", 2); @@ -105,8 +105,8 @@ describe("LRUCache", () => { expect(cache.get("a")).toBeUndefined(); }); - it("works with default size", () => { - const cache = new LRUCache(); + it("works with large capacity", () => { + const cache = new LRUCache({ max: 10000 }); // Should be able to add many items for (let i = 0; i < 1000; i++) { diff --git a/src/helpers/lru-cache.ts b/src/helpers/lru-cache.ts index cb64792..77ef617 100644 --- a/src/helpers/lru-cache.ts +++ b/src/helpers/lru-cache.ts @@ -1,91 +1,8 @@ /** - * Simple LRU (Least Recently Used) cache implementation. + * LRU cache re-export. * - * Used for interning frequently-used PDF objects (PdfName, PdfRef) - * while preventing unbounded memory growth. + * Wraps the `lru-cache` npm package to provide the same interface used + * by PdfName and PdfRef interning caches. */ -/** - * A bounded cache that evicts least-recently-used entries when full. - * - * @typeParam K - Key type - * @typeParam V - Value type - */ -export class LRUCache { - private readonly maxSize: number; - private readonly cache = new Map(); - - /** - * Create a new LRU cache. - * - * @param maxSize - Maximum number of entries to retain (default: 10000) - */ - constructor(maxSize = 10000) { - this.maxSize = maxSize; - } - - /** - * Get a value from the cache, updating its recency. - * - * @returns The cached value, or undefined if not present - */ - get(key: K): V | undefined { - const value = this.cache.get(key); - - if (value !== undefined) { - // Move to end (most recently used) - this.cache.delete(key); - this.cache.set(key, value); - } - - return value; - } - - /** - * Check if a key exists in the cache (without updating recency). - */ - has(key: K): boolean { - return this.cache.has(key); - } - - /** - * Add or update a value in the cache. - * - * If the cache is at capacity, the least-recently-used entry is evicted. - */ - set(key: K, value: V): void { - // If key exists, delete it first so it becomes the most recent - if (this.cache.has(key)) { - this.cache.delete(key); - } else if (this.cache.size >= this.maxSize) { - // Evict the oldest entry (first in Map iteration order) - const oldestKey = this.cache.keys().next().value; - if (oldestKey !== undefined) { - this.cache.delete(oldestKey); - } - } - - this.cache.set(key, value); - } - - /** - * Remove a value from the cache. - */ - delete(key: K): boolean { - return this.cache.delete(key); - } - - /** - * Clear all entries from the cache. - */ - clear(): void { - this.cache.clear(); - } - - /** - * Get the current number of entries in the cache. - */ - get size(): number { - return this.cache.size; - } -} +export { LRUCache } from "lru-cache"; diff --git a/src/objects/pdf-name.ts b/src/objects/pdf-name.ts index a8238e9..7ba0dc8 100644 --- a/src/objects/pdf-name.ts +++ b/src/objects/pdf-name.ts @@ -57,7 +57,7 @@ export class PdfName implements PdfPrimitive { return "name"; } - private static cache = new LRUCache(DEFAULT_NAME_CACHE_SIZE); + private static cache = new LRUCache({ max: DEFAULT_NAME_CACHE_SIZE }); /** * Pre-cached common names that should never be evicted. diff --git a/src/objects/pdf-ref.ts b/src/objects/pdf-ref.ts index 3c4e474..77312c7 100644 --- a/src/objects/pdf-ref.ts +++ b/src/objects/pdf-ref.ts @@ -23,7 +23,7 @@ export class PdfRef implements PdfPrimitive { return "ref"; } - private static cache = new LRUCache(DEFAULT_REF_CACHE_SIZE); + private static cache = new LRUCache({ max: DEFAULT_REF_CACHE_SIZE }); private constructor( readonly objectNumber: number, From 05fd0e9ec106ca0accd7a17654b086eafdf30212 Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 11:46:18 +1100 Subject: [PATCH 4/9] perf(writer): cache PdfName bytes, shared hex table, skip tiny deflate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three changes: - PdfName.toBytes() caches serialized bytes on the interned instance (compute once, writeBytes on every subsequent call). ASCII fast-path skips TextEncoder entirely for the 99% of names that are pure ASCII. - Shared HEX_TABLE in buffer.ts replaces per-byte toString(16) calls in both bytesToHex and escapeName. - Skip deflate for streams under 512 bytes (configurable via compressionThreshold). Deflate init zeros a 64KB hash table per call; for tiny streams the overhead dwarfs any savings. - Expose compressStreams and compressionThreshold on SaveOptions. Cumulative from baseline: 582ms → 245ms (2.38x) on 2000-page split. --- reports/baselines/escape-name-opt.json | 224 +++++++++++++++++++++++ reports/baselines/skip-tiny-deflate.json | 224 +++++++++++++++++++++++ src/api/pdf.ts | 24 +++ src/helpers/buffer.ts | 9 +- src/objects/pdf-name.ts | 53 +++++- src/writer/pdf-writer.ts | 35 +++- 6 files changed, 554 insertions(+), 15 deletions(-) create mode 100644 reports/baselines/escape-name-opt.json create mode 100644 reports/baselines/skip-tiny-deflate.json diff --git a/reports/baselines/escape-name-opt.json b/reports/baselines/escape-name-opt.json new file mode 100644 index 0000000..55cfe9e --- /dev/null +++ b/reports/baselines/escape-name-opt.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", + "groups": [ + { + "fullName": "benchmarks/splitting.bench.ts > Extract single page", + "benchmarks": [ + { + "id": "-1072438084_0_0", + "name": "extractPages (1 page from small PDF)", + "rank": 1, + "rme": 1.3598162943493441, + "samples": [], + "totalTime": 500.19283100000087, + "min": 0.3138329999999314, + "max": 2.999708000000055, + "hz": 2802.9190206446556, + "period": 0.35677092082739004, + "mean": 0.35677092082739004, + "variance": 0.008589645712391514, + "sd": 0.09268034156384791, + "sem": 0.002475218936179106, + "df": 1401, + "critical": 1.96, + "moe": 0.004851429114911048, + "p75": 0.3457499999999527, + "p99": 0.5737500000000182, + "p995": 0.6079170000000431, + "p999": 0.9181670000000395, + "sampleCount": 1402, + "median": 0.333729500000004 + }, + { + "id": "-1072438084_0_1", + "name": "extractPages (1 page from 100-page PDF)", + "rank": 2, + "rme": 0.5969408576151285, + "samples": [], + "totalTime": 500.4518780000001, + "min": 1.4726660000001175, + "max": 2.0149169999999685, + "hz": 633.4275360637171, + "period": 1.5787125488958993, + "mean": 1.5787125488958993, + "variance": 0.007328512823299263, + "sd": 0.0856067335161158, + "sem": 0.0048081531778861415, + "df": 316, + "critical": 1.96, + "moe": 0.009423980228656837, + "p75": 1.589292000000114, + "p99": 1.8592499999999745, + "p995": 1.8713330000000497, + "p999": 2.0149169999999685, + "sampleCount": 317, + "median": 1.55074999999988 + }, + { + "id": "-1072438084_0_2", + "name": "extractPages (1 page from 2000-page PDF)", + "rank": 3, + "rme": 0.8810702753157642, + "samples": [], + "totalTime": 503.6995829999996, + "min": 24.38437500000009, + "max": 26.65845900000022, + "hz": 39.70620718183127, + "period": 25.184979149999982, + "mean": 25.184979149999982, + "variance": 0.2247997529768304, + "sd": 0.47413052314402876, + "sem": 0.10601880799575855, + "df": 19, + "critical": 2.093, + "moe": 0.22189736513512265, + "p75": 25.31841600000007, + "p99": 26.65845900000022, + "p995": 26.65845900000022, + "p999": 26.65845900000022, + "sampleCount": 20, + "median": 25.21672899999976 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", + "benchmarks": [ + { + "id": "-1072438084_1_0", + "name": "split 100-page PDF (0.1MB)", + "rank": 1, + "rme": 1.7048381748393946, + "samples": [], + "totalTime": 502.2399970000006, + "min": 19.57883300000003, + "max": 23.024417000000085, + "hz": 47.78591936794706, + "period": 20.92666654166669, + "mean": 20.92666654166669, + "variance": 0.7136030401800851, + "sd": 0.8447502827345399, + "sem": 0.17243392939762045, + "df": 23, + "critical": 2.069, + "moe": 0.3567657999236767, + "p75": 21.305459000000155, + "p99": 23.024417000000085, + "p995": 23.024417000000085, + "p999": 23.024417000000085, + "sampleCount": 24, + "median": 20.864958000000115 + }, + { + "id": "-1072438084_1_1", + "name": "split 2000-page PDF (0.9MB)", + "rank": 2, + "rme": 0, + "samples": [], + "totalTime": 369.2785839999997, + "min": 369.2785839999997, + "max": 369.2785839999997, + "hz": 2.7079826540929353, + "period": 369.2785839999997, + "mean": 369.2785839999997, + "variance": 0, + "sd": 0, + "sem": 0, + "df": 0, + "critical": 12.71, + "moe": 0, + "p75": 369.2785839999997, + "p99": 369.2785839999997, + "p995": 369.2785839999997, + "p999": 369.2785839999997, + "sampleCount": 1, + "median": 369.2785839999997 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", + "benchmarks": [ + { + "id": "-1072438084_2_0", + "name": "extract first 10 pages from 2000-page PDF", + "rank": 1, + "rme": 2.2077471741058514, + "samples": [], + "totalTime": 511.5155429999995, + "min": 25.185083999999733, + "max": 31.580417000000125, + "hz": 37.14452133471146, + "period": 26.9218706842105, + "mean": 26.9218706842105, + "variance": 1.5205847790653222, + "sd": 1.2331199370155859, + "sem": 0.2828971153008515, + "df": 18, + "critical": 2.101, + "moe": 0.594366839247089, + "p75": 27.052708000000166, + "p99": 31.580417000000125, + "p995": 31.580417000000125, + "p999": 31.580417000000125, + "sampleCount": 19, + "median": 26.722999999999956 + }, + { + "id": "-1072438084_2_1", + "name": "extract first 100 pages from 2000-page PDF", + "rank": 2, + "rme": 1.8050175681822698, + "samples": [], + "totalTime": 517.9517489999989, + "min": 33.36449999999968, + "max": 37.865499999999884, + "hz": 28.960226563497965, + "period": 34.53011659999993, + "mean": 34.53011659999993, + "variance": 1.2664720864594134, + "sd": 1.1253764198966556, + "sem": 0.2905709421649515, + "df": 14, + "critical": 2.145, + "moe": 0.6232746709438209, + "p75": 35.07374999999956, + "p99": 37.865499999999884, + "p995": 37.865499999999884, + "p999": 37.865499999999884, + "sampleCount": 15, + "median": 34.1270829999994 + }, + { + "id": "-1072438084_2_2", + "name": "extract every 10th page from 2000-page PDF (200 pages)", + "rank": 3, + "rme": 3.7908921701889815, + "samples": [], + "totalTime": 528.9624159999994, + "min": 37.919458000000304, + "max": 48.4839579999998, + "hz": 24.576415274086347, + "period": 40.689416615384566, + "mean": 40.689416615384566, + "variance": 6.5144038865667575, + "sd": 2.552333028146358, + "sem": 0.707889815772459, + "df": 12, + "critical": 2.179, + "moe": 1.542491908568188, + "p75": 40.70625000000018, + "p99": 48.4839579999998, + "p995": 48.4839579999998, + "p999": 48.4839579999998, + "sampleCount": 13, + "median": 40.056916999999885 + } + ] + } + ] + } + ] +} diff --git a/reports/baselines/skip-tiny-deflate.json b/reports/baselines/skip-tiny-deflate.json new file mode 100644 index 0000000..d11d64e --- /dev/null +++ b/reports/baselines/skip-tiny-deflate.json @@ -0,0 +1,224 @@ +{ + "files": [ + { + "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", + "groups": [ + { + "fullName": "benchmarks/splitting.bench.ts > Extract single page", + "benchmarks": [ + { + "id": "-1072438084_0_0", + "name": "extractPages (1 page from small PDF)", + "rank": 1, + "rme": 0.8892829061609049, + "samples": [], + "totalTime": 500.31841499999973, + "min": 0.2905420000001868, + "max": 1.9279579999999896, + "hz": 2924.1378213112557, + "period": 0.3419811449077237, + "mean": 0.3419811449077237, + "variance": 0.0035222193288500063, + "sd": 0.05934828833968176, + "sem": 0.001551622379570276, + "df": 1462, + "critical": 1.96, + "moe": 0.0030411798639577406, + "p75": 0.3381249999999909, + "p99": 0.5579999999999927, + "p995": 0.5912919999998394, + "p999": 0.6483749999999873, + "sampleCount": 1463, + "median": 0.3292500000002292 + }, + { + "id": "-1072438084_0_1", + "name": "extractPages (1 page from 100-page PDF)", + "rank": 2, + "rme": 0.6942473447539961, + "samples": [], + "totalTime": 500.1834050000034, + "min": 1.3748750000004293, + "max": 1.868916999999783, + "hz": 669.7543274151564, + "period": 1.4930847910447862, + "mean": 1.4930847910447862, + "variance": 0.009369794423687357, + "sd": 0.09679769844209808, + "sem": 0.005288623222833765, + "df": 334, + "critical": 1.96, + "moe": 0.010365701516754179, + "p75": 1.5309159999997064, + "p99": 1.8027499999998327, + "p995": 1.8117079999997259, + "p999": 1.868916999999783, + "sampleCount": 335, + "median": 1.4698750000002292 + }, + { + "id": "-1072438084_0_2", + "name": "extractPages (1 page from 2000-page PDF)", + "rank": 3, + "rme": 1.1888586380169315, + "samples": [], + "totalTime": 501.3285840000003, + "min": 24.17533400000002, + "max": 27.005499999999756, + "hz": 39.89399495321812, + "period": 25.066429200000016, + "mean": 25.066429200000016, + "variance": 0.4054496384268641, + "sd": 0.6367492743826758, + "sem": 0.14238146621433284, + "df": 19, + "critical": 2.093, + "moe": 0.2980044087865986, + "p75": 25.286291999999776, + "p99": 27.005499999999756, + "p995": 27.005499999999756, + "p999": 27.005499999999756, + "sampleCount": 20, + "median": 24.951479000000063 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", + "benchmarks": [ + { + "id": "-1072438084_1_0", + "name": "split 100-page PDF (0.1MB)", + "rank": 1, + "rme": 1.241299549103146, + "samples": [], + "totalTime": 509.5840829999979, + "min": 12.014166999999816, + "max": 13.951500000000124, + "hz": 78.49538738438218, + "period": 12.739602074999947, + "mean": 12.739602074999947, + "variance": 0.2443938158093934, + "sd": 0.4943620290934503, + "sem": 0.07816550003188641, + "df": 39, + "critical": 2.0231, + "moe": 0.15813662311450938, + "p75": 12.96362500000032, + "p99": 13.951500000000124, + "p995": 13.951500000000124, + "p999": 13.951500000000124, + "sampleCount": 40, + "median": 12.703645499999766 + }, + { + "id": "-1072438084_1_1", + "name": "split 2000-page PDF (0.9MB)", + "rank": 2, + "rme": 0, + "samples": [], + "totalTime": 244.68245800000022, + "min": 244.68245800000022, + "max": 244.68245800000022, + "hz": 4.086929680917294, + "period": 244.68245800000022, + "mean": 244.68245800000022, + "variance": 0, + "sd": 0, + "sem": 0, + "df": 0, + "critical": 12.71, + "moe": 0, + "p75": 244.68245800000022, + "p99": 244.68245800000022, + "p995": 244.68245800000022, + "p999": 244.68245800000022, + "sampleCount": 1, + "median": 244.68245800000022 + } + ] + }, + { + "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", + "benchmarks": [ + { + "id": "-1072438084_2_0", + "name": "extract first 10 pages from 2000-page PDF", + "rank": 1, + "rme": 0.9557725912034225, + "samples": [], + "totalTime": 504.57903800000076, + "min": 24.360125000000153, + "max": 25.940999999999804, + "hz": 39.637001329413074, + "period": 25.228951900000038, + "mean": 25.228951900000038, + "variance": 0.2654600064528459, + "sd": 0.5152281110856102, + "sem": 0.11520850803062373, + "df": 19, + "critical": 2.093, + "moe": 0.24113140730809546, + "p75": 25.62262499999997, + "p99": 25.940999999999804, + "p995": 25.940999999999804, + "p999": 25.940999999999804, + "sampleCount": 20, + "median": 25.240332999999737 + }, + { + "id": "-1072438084_2_1", + "name": "extract first 100 pages from 2000-page PDF", + "rank": 2, + "rme": 0.8129752528372926, + "samples": [], + "totalTime": 509.13470899999993, + "min": 25.983500000000276, + "max": 28.018167000000176, + "hz": 37.31821787856149, + "period": 26.796563631578945, + "mean": 26.796563631578945, + "variance": 0.20427459388908475, + "sd": 0.45196746994566406, + "sem": 0.10368844880320557, + "df": 18, + "critical": 2.101, + "moe": 0.2178494309355349, + "p75": 27.132666999999856, + "p99": 28.018167000000176, + "p995": 28.018167000000176, + "p999": 28.018167000000176, + "sampleCount": 19, + "median": 26.71374999999989 + }, + { + "id": "-1072438084_2_2", + "name": "extract every 10th page from 2000-page PDF (200 pages)", + "rank": 3, + "rme": 1.7260235531528858, + "samples": [], + "totalTime": 526.418208000001, + "min": 28.0747080000001, + "max": 31.5014170000004, + "hz": 34.19334613896935, + "period": 29.245456000000054, + "mean": 29.245456000000054, + "variance": 1.0301911737761424, + "sd": 1.014983336698757, + "sem": 0.23923386672368, + "df": 17, + "critical": 2.11, + "moe": 0.5047834587869647, + "p75": 29.7938750000003, + "p99": 31.5014170000004, + "p995": 31.5014170000004, + "p999": 31.5014170000004, + "sampleCount": 18, + "median": 28.999896000000717 + } + ] + } + ] + } + ] +} diff --git a/src/api/pdf.ts b/src/api/pdf.ts index d34fc69..b2198ee 100644 --- a/src/api/pdf.ts +++ b/src/api/pdf.ts @@ -125,6 +125,26 @@ export interface SaveOptions { * @default false */ subsetFonts?: boolean; + + /** + * Compress uncompressed streams with FlateDecode (default: true). + * + * When enabled, streams without a /Filter entry will be compressed + * before writing. Streams that already have filters (including image + * formats like DCTDecode/JPXDecode) are left unchanged. + */ + compressStreams?: boolean; + + /** + * Minimum stream size in bytes to attempt compression (default: 512). + * + * Streams smaller than this threshold are written uncompressed. + * Deflate initialization has a fixed overhead that dominates for small + * payloads, and tiny streams rarely achieve meaningful compression. + * + * Set to 0 to compress all streams regardless of size. + */ + compressionThreshold?: number; } /** @@ -3141,6 +3161,8 @@ export class PDF { id: fileId, useXRefStream, securityHandler, + compressStreams: options.compressStreams, + compressionThreshold: options.compressionThreshold, }); // Reset pending security state after successful save @@ -3158,6 +3180,8 @@ export class PDF { id: fileId, useXRefStream, securityHandler, + compressStreams: options.compressStreams, + compressionThreshold: options.compressionThreshold, }); // Reset pending security state after successful save diff --git a/src/helpers/buffer.ts b/src/helpers/buffer.ts index 2aab629..d5bdded 100644 --- a/src/helpers/buffer.ts +++ b/src/helpers/buffer.ts @@ -43,6 +43,13 @@ export function concatBytes(arrays: Uint8Array[]): Uint8Array { return result; } +/** Pre-computed hex lookup: byte value → "XX" uppercase string. */ +export const HEX_TABLE: string[] = new Array(256); + +for (let i = 0; i < 256; i++) { + HEX_TABLE[i] = i.toString(16).toUpperCase().padStart(2, "0"); +} + /** * Convert bytes to uppercase hex string. * @@ -58,7 +65,7 @@ export function bytesToHex(bytes: Uint8Array): string { let hex = ""; for (const byte of bytes) { - hex += byte.toString(16).toUpperCase().padStart(2, "0"); + hex += HEX_TABLE[byte]; } return hex; diff --git a/src/objects/pdf-name.ts b/src/objects/pdf-name.ts index 7ba0dc8..d52561c 100644 --- a/src/objects/pdf-name.ts +++ b/src/objects/pdf-name.ts @@ -1,3 +1,4 @@ +import { HEX_TABLE } from "#src/helpers/buffer"; import { CHAR_HASH, DELIMITERS, WHITESPACE } from "#src/helpers/chars"; import { LRUCache } from "#src/helpers/lru-cache"; import type { ByteWriter } from "#src/io/byte-writer"; @@ -9,11 +10,25 @@ import type { PdfPrimitive } from "./pdf-primitive"; // Plus anything outside printable ASCII (33-126) const NAME_NEEDS_ESCAPE = new Set([...WHITESPACE, ...DELIMITERS, CHAR_HASH]); +/** Module-level encoder — avoids constructing one per escapeName call. */ +const textEncoder = new TextEncoder(); + /** - * Default cache size for PdfName interning. - * Can be overridden via PdfName.setCacheSize(). + * Check whether a name is pure "safe" ASCII — every char is printable ASCII + * (33–126) and not in the escape set. If so, no escaping is needed and we + * can skip the TextEncoder entirely. */ -const DEFAULT_NAME_CACHE_SIZE = 10000; +function isSimpleAsciiName(name: string): boolean { + for (let i = 0; i < name.length; i++) { + const c = name.charCodeAt(i); + + if (c < 33 || c > 126 || NAME_NEEDS_ESCAPE.has(c)) { + return false; + } + } + + return true; +} /** * Escape a PDF name for serialization. @@ -24,15 +39,18 @@ const DEFAULT_NAME_CACHE_SIZE = 10000; * - The # character itself */ function escapeName(name: string): string { - const encoder = new TextEncoder(); - const bytes = encoder.encode(name); + // Fast path: pure safe ASCII — no encoding or escaping needed + if (isSimpleAsciiName(name)) { + return name; + } + + const bytes = textEncoder.encode(name); let result = ""; for (const byte of bytes) { if (byte < 33 || byte > 126 || NAME_NEEDS_ESCAPE.has(byte)) { - // Use hex escape - result += `#${byte.toString(16).toUpperCase().padStart(2, "0")}`; + result += `#${HEX_TABLE[byte]}`; } else { result += String.fromCharCode(byte); } @@ -41,6 +59,12 @@ function escapeName(name: string): string { return result; } +/** + * Default cache size for PdfName interning. + * Can be overridden via PdfName.setCacheSize(). + */ +const DEFAULT_NAME_CACHE_SIZE = 10000; + /** * PDF name object (interned). * @@ -80,6 +104,9 @@ export class PdfName implements PdfPrimitive { static readonly Filter = PdfName.createPermanent("Filter"); static readonly FlateDecode = PdfName.createPermanent("FlateDecode"); + /** Cached serialized form (e.g. "/Type"). Computed lazily on first toBytes(). */ + private cachedBytes: Uint8Array | null = null; + private constructor(readonly value: string) {} /** @@ -124,7 +151,17 @@ export class PdfName implements PdfPrimitive { } toBytes(writer: ByteWriter): void { - writer.writeAscii(`/${escapeName(this.value)}`); + let bytes = this.cachedBytes; + + if (bytes === null) { + const escaped = escapeName(this.value); + + bytes = textEncoder.encode(`/${escaped}`); + + this.cachedBytes = bytes; + } + + writer.writeBytes(bytes); } /** diff --git a/src/writer/pdf-writer.ts b/src/writer/pdf-writer.ts index 27469df..371a073 100644 --- a/src/writer/pdf-writer.ts +++ b/src/writer/pdf-writer.ts @@ -54,6 +54,18 @@ export interface WriteOptions { */ compressStreams?: boolean; + /** + * Minimum stream size in bytes to attempt compression (default: 512). + * + * Streams smaller than this threshold are written uncompressed. + * Deflate initialization has a fixed cost (~0.023ms for pako's 64KB + * hash table) that dominates for small payloads, and tiny streams + * rarely achieve meaningful compression. + * + * Set to 0 to compress all streams regardless of size. + */ + compressionThreshold?: number; + /** * Security handler for encrypting content. * @@ -106,7 +118,13 @@ function writeIndirectObject(writer: ByteWriter, ref: PdfRef, obj: PdfObject): v * Streams that already have filters are returned unchanged - this includes * image formats (DCTDecode, JPXDecode, etc.) that are already compressed. */ -function prepareObjectForWrite(obj: PdfObject, compress: boolean): PdfObject { +const DEFAULT_COMPRESSION_THRESHOLD = 512; + +function prepareObjectForWrite( + obj: PdfObject, + compress: boolean, + compressionThreshold: number, +): PdfObject { // Only process streams if (!(obj instanceof PdfStream)) { return obj; @@ -122,8 +140,11 @@ function prepareObjectForWrite(obj: PdfObject, compress: boolean): PdfObject { return obj; } - // Empty streams don't need compression - if (obj.data.length === 0) { + // Pako's deflate initialization zeros a 64KB hash table on every call + // (~0.023ms). For streams below the threshold the compression savings + // are negligible relative to the init cost, especially when writing + // many PDFs (e.g. splitting 2000 pages). + if (obj.data.length < compressionThreshold) { return obj; } @@ -322,6 +343,7 @@ function collectReachableRefs( export function writeComplete(registry: ObjectRegistry, options: WriteOptions): WriteResult { const writer = new ByteWriter(); const compress = options.compressStreams ?? true; + const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD; // Version const version = options.version ?? "1.7"; @@ -346,7 +368,7 @@ export function writeComplete(registry: ObjectRegistry, options: WriteOptions): continue; // Skip orphan objects } // Prepare object (compress streams if needed) - let prepared = prepareObjectForWrite(obj, compress); + let prepared = prepareObjectForWrite(obj, compress, threshold); // Apply encryption if security handler is provided // Skip encrypting the /Encrypt dictionary itself @@ -467,6 +489,7 @@ export function writeIncremental( } const compress = options.compressStreams ?? true; + const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD; // Initialize ByteWriter with original bytes const writer = new ByteWriter(options.originalBytes); @@ -483,7 +506,7 @@ export function writeIncremental( // Write modified objects for (const [ref, obj] of changes.modified) { - let prepared = prepareObjectForWrite(obj, compress); + let prepared = prepareObjectForWrite(obj, compress, threshold); // Apply encryption if security handler is provided // Skip encrypting the /Encrypt dictionary itself @@ -505,7 +528,7 @@ export function writeIncremental( // Write new objects for (const [ref, obj] of changes.created) { - let prepared = prepareObjectForWrite(obj, compress); + let prepared = prepareObjectForWrite(obj, compress, threshold); // Apply encryption if security handler is provided // Skip encrypting the /Encrypt dictionary itself From 009a6145a6b2c0aa6078f1d418061f59773f0654 Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 13:47:42 +1100 Subject: [PATCH 5/9] ci: add PR benchmark comparison workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Runs splitting benchmarks on both base and PR branches when .ts files are changed. Posts a comparison table as a sticky PR comment showing per-benchmark speedup/regression with 🟢/🔴 indicators at ±5% threshold. --- .github/workflows/bench-pr.yml | 62 ++++++++++++++ scripts/bench-compare.ts | 150 +++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 .github/workflows/bench-pr.yml create mode 100644 scripts/bench-compare.ts diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml new file mode 100644 index 0000000..9321de1 --- /dev/null +++ b/.github/workflows/bench-pr.yml @@ -0,0 +1,62 @@ +name: PR Benchmarks + +on: + pull_request: + branches: [main] + paths: + - "src/**/*.ts" + - "benchmarks/**/*.ts" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + bench: + name: Benchmark Comparison + runs-on: ubuntu-latest + steps: + - name: Checkout PR + uses: actions/checkout@v4 + with: + path: pr + + - name: Checkout base + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.base.sha }} + path: base + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + + - name: Install dependencies (base) + run: bun install --frozen-lockfile + working-directory: base + + - name: Install dependencies (PR) + run: bun install --frozen-lockfile + working-directory: pr + + - name: Run benchmarks (base) + run: bun run bench -- --outputJson ../base-results.json benchmarks/splitting.bench.ts + working-directory: base + continue-on-error: true + + - name: Run benchmarks (PR) + run: bun run bench -- --outputJson ../pr-results.json benchmarks/splitting.bench.ts + working-directory: pr + + - name: Generate comparison + run: bun run pr/scripts/bench-compare.ts ../base-results.json ../pr-results.json bench-comment.md + working-directory: pr + + - name: Post or update PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: benchmark-results + path: pr/bench-comment.md diff --git a/scripts/bench-compare.ts b/scripts/bench-compare.ts new file mode 100644 index 0000000..2be007e --- /dev/null +++ b/scripts/bench-compare.ts @@ -0,0 +1,150 @@ +/** + * Compare two benchmark JSON files and produce a markdown summary. + * + * Usage: + * bun run scripts/bench-compare.ts + * + * If does not exist, outputs PR-only results. + */ + +import { existsSync, readFileSync } from "node:fs"; + +interface Bench { + name: string; + mean: number; + hz: number; + p99: number; + rme: number; + sampleCount: number; +} + +interface Group { + fullName: string; + benchmarks: Bench[]; +} + +interface File { + filepath: string; + groups: Group[]; +} + +interface Output { + files: File[]; +} + +function buildMap(data: Output): Map { + const map = new Map(); + + for (const file of data.files) { + for (const group of file.groups) { + for (const b of group.benchmarks) { + map.set(b.name, b); + } + } + } + + return map; +} + +function formatMs(ms: number): string { + if (ms >= 1000) { + return `${(ms / 1000).toFixed(2)}s`; + } + + if (ms >= 1) { + return `${ms.toFixed(2)}ms`; + } + + return `${(ms * 1000).toFixed(0)}μs`; +} + +// ───────────────────────────────────────────────────────────────────────────── + +const [basePath, prPath, outputPath] = process.argv.slice(2); + +if (!prPath || !outputPath) { + console.error("Usage: bun run scripts/bench-compare.ts "); + process.exit(1); +} + +const pr: Output = JSON.parse(readFileSync(prPath, "utf-8")); +const prMap = buildMap(pr); +const hasBase = existsSync(basePath); + +const lines: string[] = []; +lines.push("## Benchmark Results"); +lines.push(""); + +if (hasBase) { + const base: Output = JSON.parse(readFileSync(basePath, "utf-8")); + const baseMap = buildMap(base); + + lines.push("| Benchmark | Base | PR | Change |"); + lines.push("|:---|---:|---:|---:|"); + + let hasRegression = false; + + for (const [name, b] of baseMap) { + const p = prMap.get(name); + + if (!p) { + continue; + } + + const ratio = b.mean / p.mean; + let change: string; + let indicator = ""; + + if (ratio > 1.05) { + change = `${ratio.toFixed(2)}x faster`; + indicator = " 🟢"; + } else if (ratio < 0.95) { + change = `${(1 / ratio).toFixed(2)}x slower`; + indicator = " 🔴"; + hasRegression = true; + } else { + change = "~same"; + } + + lines.push(`| ${name} | ${formatMs(b.mean)} | ${formatMs(p.mean)} | ${change}${indicator} |`); + } + + // Show benchmarks only in PR (new benchmarks) + for (const [name, p] of prMap) { + if (!baseMap.has(name)) { + lines.push(`| ${name} | — | ${formatMs(p.mean)} | *new* |`); + } + } + + lines.push(""); + + if (hasRegression) { + lines.push("> ⚠️ **Performance regression detected.** Please review the changes above."); + } else { + lines.push("> ✅ No performance regressions detected."); + } +} else { + // No base results — just show PR numbers + lines.push("*No base benchmarks available for comparison (new benchmark suite?).*"); + lines.push(""); + lines.push("| Benchmark | Mean | p99 | Samples |"); + lines.push("|:---|---:|---:|---:|"); + + for (const [name, p] of prMap) { + lines.push(`| ${name} | ${formatMs(p.mean)} | ${formatMs(p.p99)} | ${p.sampleCount} |`); + } +} + +lines.push(""); +lines.push( + `
Environment\n\n` + + `- Runner: \`ubuntu-latest\`\n` + + `- Runtime: Bun ${process.versions.bun}\n` + + `- Benchmark: \`benchmarks/splitting.bench.ts\`\n\n` + + `*Results are machine-dependent. Thresholds: >5% faster 🟢, >5% slower 🔴.*\n` + + `
`, +); + +const body = lines.join("\n"); +await Bun.write(outputPath, body); +console.log(body); From ca39f3ec49dc096b5da76ffeecafa681d49d010f Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 13:54:59 +1100 Subject: [PATCH 6/9] chore: tidy baselines remove local report --- reports/baselines/baseline.json | 224 ----------------- reports/baselines/comparison-npm-lru-cache.md | 39 --- reports/baselines/comparison-sync-copier.md | 28 --- reports/baselines/escape-name-opt.json | 224 ----------------- reports/baselines/npm-lru-cache.json | 224 ----------------- reports/baselines/skip-tiny-deflate.json | 224 ----------------- reports/baselines/sync-copier.json | 224 ----------------- reports/benchmarks.md | 233 +++++++++--------- 8 files changed, 121 insertions(+), 1299 deletions(-) delete mode 100644 reports/baselines/baseline.json delete mode 100644 reports/baselines/comparison-npm-lru-cache.md delete mode 100644 reports/baselines/comparison-sync-copier.md delete mode 100644 reports/baselines/escape-name-opt.json delete mode 100644 reports/baselines/npm-lru-cache.json delete mode 100644 reports/baselines/skip-tiny-deflate.json delete mode 100644 reports/baselines/sync-copier.json diff --git a/reports/baselines/baseline.json b/reports/baselines/baseline.json deleted file mode 100644 index bae2d4c..0000000 --- a/reports/baselines/baseline.json +++ /dev/null @@ -1,224 +0,0 @@ -{ - "files": [ - { - "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", - "groups": [ - { - "fullName": "benchmarks/splitting.bench.ts > Extract single page", - "benchmarks": [ - { - "id": "-1072438084_0_0", - "name": "extractPages (1 page from small PDF)", - "rank": 1, - "rme": 1.598586232947195, - "samples": [], - "totalTime": 500.1328439999987, - "min": 0.3573750000000473, - "max": 2.4449999999999363, - "hz": 2215.411391778147, - "period": 0.45138343321299523, - "mean": 0.45138343321299523, - "variance": 0.015017269876550877, - "sd": 0.12254497083336745, - "sem": 0.0036815068475241517, - "df": 1107, - "critical": 1.96, - "moe": 0.007215753421147338, - "p75": 0.4394999999999527, - "p99": 0.8506250000000364, - "p995": 0.9532910000000356, - "p999": 1.7487909999999829, - "sampleCount": 1108, - "median": 0.4120829999999387 - }, - { - "id": "-1072438084_0_1", - "name": "extractPages (1 page from 100-page PDF)", - "rank": 2, - "rme": 1.14326877952979, - "samples": [], - "totalTime": 500.9256660000017, - "min": 1.502375000000029, - "max": 2.347500000000082, - "hz": 560.9614740722809, - "period": 1.782653615658369, - "mean": 1.782653615658369, - "variance": 0.03038259004746126, - "sd": 0.17430602412843127, - "sem": 0.010398225630092402, - "df": 280, - "critical": 1.96, - "moe": 0.02038052223498111, - "p75": 1.8196669999999813, - "p99": 2.319291000000021, - "p995": 2.3412920000000668, - "p999": 2.347500000000082, - "sampleCount": 281, - "median": 1.7196670000000722 - }, - { - "id": "-1072438084_0_2", - "name": "extractPages (1 page from 2000-page PDF)", - "rank": 3, - "rme": 2.6124445200935758, - "samples": [], - "totalTime": 531.2322489999992, - "min": 37.85783300000003, - "max": 44.21758299999965, - "hz": 24.471405914214404, - "period": 40.864019153846094, - "mean": 40.864019153846094, - "variance": 3.120363595900889, - "sd": 1.7664550930892324, - "sem": 0.4899264933798263, - "df": 12, - "critical": 2.179, - "moe": 1.0675498290746415, - "p75": 42.14979199999971, - "p99": 44.21758299999965, - "p995": 44.21758299999965, - "p999": 44.21758299999965, - "sampleCount": 13, - "median": 40.22212499999978 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", - "benchmarks": [ - { - "id": "-1072438084_1_0", - "name": "split 100-page PDF (0.1MB)", - "rank": 1, - "rme": 1.8856591767197517, - "samples": [], - "totalTime": 506.2218300000013, - "min": 30.098917000000256, - "max": 34.55699999999979, - "hz": 31.60669700870063, - "period": 31.638864375000082, - "mean": 31.638864375000082, - "variance": 1.2540684871776795, - "sd": 1.119851993424881, - "sem": 0.27996299835622024, - "df": 15, - "critical": 2.131, - "moe": 0.5966011494971053, - "p75": 32.05224999999973, - "p99": 34.55699999999979, - "p995": 34.55699999999979, - "p999": 34.55699999999979, - "sampleCount": 16, - "median": 31.37264550000009 - }, - { - "id": "-1072438084_1_1", - "name": "split 2000-page PDF (0.9MB)", - "rank": 2, - "rme": 0, - "samples": [], - "totalTime": 582.5043749999995, - "min": 582.5043749999995, - "max": 582.5043749999995, - "hz": 1.716725303565318, - "period": 582.5043749999995, - "mean": 582.5043749999995, - "variance": 0, - "sd": 0, - "sem": 0, - "df": 0, - "critical": 12.71, - "moe": 0, - "p75": 582.5043749999995, - "p99": 582.5043749999995, - "p995": 582.5043749999995, - "p999": 582.5043749999995, - "sampleCount": 1, - "median": 582.5043749999995 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", - "benchmarks": [ - { - "id": "-1072438084_2_0", - "name": "extract first 10 pages from 2000-page PDF", - "rank": 1, - "rme": 3.094676818514817, - "samples": [], - "totalTime": 514.4472479999995, - "min": 39.471832999999606, - "max": 46.71299999999974, - "hz": 23.326006790107296, - "period": 42.87060399999996, - "mean": 42.87060399999996, - "variance": 4.360044973374979, - "sd": 2.0880720709245115, - "sem": 0.602774486117803, - "df": 11, - "critical": 2.201, - "moe": 1.3267066439452846, - "p75": 43.80012500000066, - "p99": 46.71299999999974, - "p995": 46.71299999999974, - "p999": 46.71299999999974, - "sampleCount": 12, - "median": 42.56633349999993 - }, - { - "id": "-1072438084_2_1", - "name": "extract first 100 pages from 2000-page PDF", - "rank": 2, - "rme": 3.2071455847378436, - "samples": [], - "totalTime": 509.16504200000236, - "min": 48.13025000000016, - "max": 54.86754199999996, - "hz": 19.639997201535987, - "period": 50.91650420000023, - "mean": 50.91650420000023, - "variance": 5.211578755405502, - "sd": 2.282888248558282, - "sem": 0.7219126509076774, - "df": 9, - "critical": 2.262, - "moe": 1.6329664163531663, - "p75": 52.17775000000074, - "p99": 54.86754199999996, - "p995": 54.86754199999996, - "p999": 54.86754199999996, - "sampleCount": 10, - "median": 51.348021000000244 - }, - { - "id": "-1072438084_2_2", - "name": "extract every 10th page from 2000-page PDF (200 pages)", - "rank": 3, - "rme": 4.394613298511672, - "samples": [], - "totalTime": 529.0494579999995, - "min": 54.37287500000002, - "max": 64.11266699999942, - "hz": 17.011642038200534, - "period": 58.78327311111106, - "mean": 58.78327311111106, - "variance": 11.294649634264848, - "sd": 3.360751349663472, - "sem": 1.120250449887824, - "df": 8, - "critical": 2.306, - "moe": 2.5832975374413225, - "p75": 60.755959000000075, - "p99": 64.11266699999942, - "p995": 64.11266699999942, - "p999": 64.11266699999942, - "sampleCount": 9, - "median": 57.71983300000011 - } - ] - } - ] - } - ] -} diff --git a/reports/baselines/comparison-npm-lru-cache.md b/reports/baselines/comparison-npm-lru-cache.md deleted file mode 100644 index 6bba72c..0000000 --- a/reports/baselines/comparison-npm-lru-cache.md +++ /dev/null @@ -1,39 +0,0 @@ -# Benchmark Comparison: npm lru-cache - -## Change - -Replaced internal LRU cache (Map delete+set on every get) with the npm `lru-cache` -package (v11, doubly-linked-list, O(1) get/set without Map rehashing). - -## Results (vs original baseline) - -| Benchmark | Baseline (ms) | Sync Copier (ms) | + npm lru-cache (ms) | Total Speedup | -| :----------------------------------------------------- | ------------: | ---------------: | -------------------: | ------------: | -| extractPages (1 page from small PDF) | 0.45 | 0.40 | 0.38 | 1.17x | -| extractPages (1 page from 100-page PDF) | 1.78 | 1.74 | 1.66 | 1.07x | -| extractPages (1 page from 2000-page PDF) | 40.86 | 41.04 | 25.50 | 1.60x | -| split 100-page PDF (0.1MB) | 31.64 | 27.35 | 24.59 | 1.29x | -| split 2000-page PDF (0.9MB) | 582.50 | 506.61 | 432.27 | 1.35x | -| extract first 10 pages from 2000-page PDF | 42.87 | 43.21 | 27.31 | 1.57x | -| extract first 100 pages from 2000-page PDF | 50.92 | 53.87 | 35.21 | 1.45x | -| extract every 10th page from 2000-page PDF (200 pages) | 58.78 | 56.65 | 40.52 | 1.45x | - -## Incremental improvement (sync copier -> + npm lru-cache) - -| Benchmark | Sync Copier (ms) | + npm lru-cache (ms) | Incremental Speedup | -| :----------------------------------------------------- | ---------------: | -------------------: | ------------------: | -| extractPages (1 page from small PDF) | 0.40 | 0.38 | 1.03x | -| extractPages (1 page from 100-page PDF) | 1.74 | 1.66 | 1.05x | -| extractPages (1 page from 2000-page PDF) | 41.04 | 25.50 | 1.61x | -| split 100-page PDF (0.1MB) | 27.35 | 24.59 | 1.11x | -| split 2000-page PDF (0.9MB) | 506.61 | 432.27 | 1.17x | -| extract first 10 pages from 2000-page PDF | 43.21 | 27.31 | 1.58x | -| extract first 100 pages from 2000-page PDF | 53.87 | 35.21 | 1.53x | -| extract every 10th page from 2000-page PDF (200 pages) | 56.65 | 40.52 | 1.40x | - -## Key Takeaways - -- **2000-page split**: 582.5ms -> 432.3ms (1.35x faster total) -- **100-page split**: 31.6ms -> 24.6ms (1.29x faster total) -- **Single page from 2000p**: 40.9ms -> 25.5ms (1.60x faster total) -- **npm lru-cache incremental gain on 2000-page split**: 506.6ms -> 432.3ms (1.17x) diff --git a/reports/baselines/comparison-sync-copier.md b/reports/baselines/comparison-sync-copier.md deleted file mode 100644 index 8752df2..0000000 --- a/reports/baselines/comparison-sync-copier.md +++ /dev/null @@ -1,28 +0,0 @@ -# Benchmark Comparison: Sync ObjectCopier - -## Change - -Removed async/await from all ObjectCopier methods. The copier does zero I/O — -every method was async but never awaited anything asynchronous. Removing the -async overhead eliminates microtask scheduling on every recursive call in the -deep-copy graph walk. - -## Results - -| Benchmark | Baseline (ms) | Sync (ms) | Speedup | -| :----------------------------------------------------- | ------------: | --------: | ------: | -| extractPages (1 page from small PDF) | 0.45 | 0.40 | 1.14x | -| extractPages (1 page from 100-page PDF) | 1.78 | 1.74 | 1.03x | -| extractPages (1 page from 2000-page PDF) | 40.86 | 41.04 | 1.00x | -| split 100-page PDF (0.1MB) | 31.64 | 27.35 | 1.16x | -| split 2000-page PDF (0.9MB) | 582.50 | 506.61 | 1.15x | -| extract first 10 pages from 2000-page PDF | 42.87 | 43.21 | 0.99x | -| extract first 100 pages from 2000-page PDF | 50.92 | 53.87 | 0.95x | -| extract every 10th page from 2000-page PDF (200 pages) | 58.78 | 56.65 | 1.04x | - -## Key Takeaways - -- **100-page split**: 31.6ms -> 27.3ms (1.16x faster) -- **2000-page split**: 582.5ms -> 506.6ms (1.15x faster) -- Single-page extraction from small PDFs: ~14% faster (0.45ms -> 0.40ms) -- Batch extraction noise is within margin of error for single runs diff --git a/reports/baselines/escape-name-opt.json b/reports/baselines/escape-name-opt.json deleted file mode 100644 index 55cfe9e..0000000 --- a/reports/baselines/escape-name-opt.json +++ /dev/null @@ -1,224 +0,0 @@ -{ - "files": [ - { - "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", - "groups": [ - { - "fullName": "benchmarks/splitting.bench.ts > Extract single page", - "benchmarks": [ - { - "id": "-1072438084_0_0", - "name": "extractPages (1 page from small PDF)", - "rank": 1, - "rme": 1.3598162943493441, - "samples": [], - "totalTime": 500.19283100000087, - "min": 0.3138329999999314, - "max": 2.999708000000055, - "hz": 2802.9190206446556, - "period": 0.35677092082739004, - "mean": 0.35677092082739004, - "variance": 0.008589645712391514, - "sd": 0.09268034156384791, - "sem": 0.002475218936179106, - "df": 1401, - "critical": 1.96, - "moe": 0.004851429114911048, - "p75": 0.3457499999999527, - "p99": 0.5737500000000182, - "p995": 0.6079170000000431, - "p999": 0.9181670000000395, - "sampleCount": 1402, - "median": 0.333729500000004 - }, - { - "id": "-1072438084_0_1", - "name": "extractPages (1 page from 100-page PDF)", - "rank": 2, - "rme": 0.5969408576151285, - "samples": [], - "totalTime": 500.4518780000001, - "min": 1.4726660000001175, - "max": 2.0149169999999685, - "hz": 633.4275360637171, - "period": 1.5787125488958993, - "mean": 1.5787125488958993, - "variance": 0.007328512823299263, - "sd": 0.0856067335161158, - "sem": 0.0048081531778861415, - "df": 316, - "critical": 1.96, - "moe": 0.009423980228656837, - "p75": 1.589292000000114, - "p99": 1.8592499999999745, - "p995": 1.8713330000000497, - "p999": 2.0149169999999685, - "sampleCount": 317, - "median": 1.55074999999988 - }, - { - "id": "-1072438084_0_2", - "name": "extractPages (1 page from 2000-page PDF)", - "rank": 3, - "rme": 0.8810702753157642, - "samples": [], - "totalTime": 503.6995829999996, - "min": 24.38437500000009, - "max": 26.65845900000022, - "hz": 39.70620718183127, - "period": 25.184979149999982, - "mean": 25.184979149999982, - "variance": 0.2247997529768304, - "sd": 0.47413052314402876, - "sem": 0.10601880799575855, - "df": 19, - "critical": 2.093, - "moe": 0.22189736513512265, - "p75": 25.31841600000007, - "p99": 26.65845900000022, - "p995": 26.65845900000022, - "p999": 26.65845900000022, - "sampleCount": 20, - "median": 25.21672899999976 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", - "benchmarks": [ - { - "id": "-1072438084_1_0", - "name": "split 100-page PDF (0.1MB)", - "rank": 1, - "rme": 1.7048381748393946, - "samples": [], - "totalTime": 502.2399970000006, - "min": 19.57883300000003, - "max": 23.024417000000085, - "hz": 47.78591936794706, - "period": 20.92666654166669, - "mean": 20.92666654166669, - "variance": 0.7136030401800851, - "sd": 0.8447502827345399, - "sem": 0.17243392939762045, - "df": 23, - "critical": 2.069, - "moe": 0.3567657999236767, - "p75": 21.305459000000155, - "p99": 23.024417000000085, - "p995": 23.024417000000085, - "p999": 23.024417000000085, - "sampleCount": 24, - "median": 20.864958000000115 - }, - { - "id": "-1072438084_1_1", - "name": "split 2000-page PDF (0.9MB)", - "rank": 2, - "rme": 0, - "samples": [], - "totalTime": 369.2785839999997, - "min": 369.2785839999997, - "max": 369.2785839999997, - "hz": 2.7079826540929353, - "period": 369.2785839999997, - "mean": 369.2785839999997, - "variance": 0, - "sd": 0, - "sem": 0, - "df": 0, - "critical": 12.71, - "moe": 0, - "p75": 369.2785839999997, - "p99": 369.2785839999997, - "p995": 369.2785839999997, - "p999": 369.2785839999997, - "sampleCount": 1, - "median": 369.2785839999997 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", - "benchmarks": [ - { - "id": "-1072438084_2_0", - "name": "extract first 10 pages from 2000-page PDF", - "rank": 1, - "rme": 2.2077471741058514, - "samples": [], - "totalTime": 511.5155429999995, - "min": 25.185083999999733, - "max": 31.580417000000125, - "hz": 37.14452133471146, - "period": 26.9218706842105, - "mean": 26.9218706842105, - "variance": 1.5205847790653222, - "sd": 1.2331199370155859, - "sem": 0.2828971153008515, - "df": 18, - "critical": 2.101, - "moe": 0.594366839247089, - "p75": 27.052708000000166, - "p99": 31.580417000000125, - "p995": 31.580417000000125, - "p999": 31.580417000000125, - "sampleCount": 19, - "median": 26.722999999999956 - }, - { - "id": "-1072438084_2_1", - "name": "extract first 100 pages from 2000-page PDF", - "rank": 2, - "rme": 1.8050175681822698, - "samples": [], - "totalTime": 517.9517489999989, - "min": 33.36449999999968, - "max": 37.865499999999884, - "hz": 28.960226563497965, - "period": 34.53011659999993, - "mean": 34.53011659999993, - "variance": 1.2664720864594134, - "sd": 1.1253764198966556, - "sem": 0.2905709421649515, - "df": 14, - "critical": 2.145, - "moe": 0.6232746709438209, - "p75": 35.07374999999956, - "p99": 37.865499999999884, - "p995": 37.865499999999884, - "p999": 37.865499999999884, - "sampleCount": 15, - "median": 34.1270829999994 - }, - { - "id": "-1072438084_2_2", - "name": "extract every 10th page from 2000-page PDF (200 pages)", - "rank": 3, - "rme": 3.7908921701889815, - "samples": [], - "totalTime": 528.9624159999994, - "min": 37.919458000000304, - "max": 48.4839579999998, - "hz": 24.576415274086347, - "period": 40.689416615384566, - "mean": 40.689416615384566, - "variance": 6.5144038865667575, - "sd": 2.552333028146358, - "sem": 0.707889815772459, - "df": 12, - "critical": 2.179, - "moe": 1.542491908568188, - "p75": 40.70625000000018, - "p99": 48.4839579999998, - "p995": 48.4839579999998, - "p999": 48.4839579999998, - "sampleCount": 13, - "median": 40.056916999999885 - } - ] - } - ] - } - ] -} diff --git a/reports/baselines/npm-lru-cache.json b/reports/baselines/npm-lru-cache.json deleted file mode 100644 index 346c580..0000000 --- a/reports/baselines/npm-lru-cache.json +++ /dev/null @@ -1,224 +0,0 @@ -{ - "files": [ - { - "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", - "groups": [ - { - "fullName": "benchmarks/splitting.bench.ts > Extract single page", - "benchmarks": [ - { - "id": "-1072438084_0_0", - "name": "extractPages (1 page from small PDF)", - "rank": 1, - "rme": 0.6803617904005139, - "samples": [], - "totalTime": 500.2104360000012, - "min": 0.31749999999988177, - "max": 0.7807909999999083, - "hz": 2598.9061931526694, - "period": 0.3847772584615394, - "mean": 0.3847772584615394, - "variance": 0.002319155302547339, - "sd": 0.04815760897872048, - "sem": 0.0013356517575117053, - "df": 1299, - "critical": 1.96, - "moe": 0.0026178774447229423, - "p75": 0.38512500000001637, - "p99": 0.615833000000066, - "p995": 0.6623329999999896, - "p999": 0.7236249999998563, - "sampleCount": 1300, - "median": 0.37283300000012787 - }, - { - "id": "-1072438084_0_1", - "name": "extractPages (1 page from 100-page PDF)", - "rank": 2, - "rme": 0.787155411012552, - "samples": [], - "totalTime": 500.06587099999865, - "min": 1.5184580000000096, - "max": 2.1133749999999054, - "hz": 601.92070176291, - "period": 1.6613484086378694, - "mean": 1.6613484086378694, - "variance": 0.013399752069859515, - "sd": 0.11575729812784814, - "sem": 0.006672139742022405, - "df": 300, - "critical": 1.96, - "moe": 0.013077393894363914, - "p75": 1.7012500000000728, - "p99": 2.0060419999999795, - "p995": 2.059125000000222, - "p999": 2.1133749999999054, - "sampleCount": 301, - "median": 1.6227090000002136 - }, - { - "id": "-1072438084_0_2", - "name": "extractPages (1 page from 2000-page PDF)", - "rank": 3, - "rme": 1.172293974657617, - "samples": [], - "totalTime": 509.9992109999989, - "min": 24.20520899999974, - "max": 26.485957999999755, - "hz": 39.215746943577216, - "period": 25.499960549999948, - "mean": 25.499960549999948, - "variance": 0.40798446042404196, - "sd": 0.6387366127161038, - "sem": 0.14282584857511646, - "df": 19, - "critical": 2.093, - "moe": 0.29893450106771874, - "p75": 26.051500000000033, - "p99": 26.485957999999755, - "p995": 26.485957999999755, - "p999": 26.485957999999755, - "sampleCount": 20, - "median": 25.388771000000133 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", - "benchmarks": [ - { - "id": "-1072438084_1_0", - "name": "split 100-page PDF (0.1MB)", - "rank": 1, - "rme": 1.5707569902861633, - "samples": [], - "totalTime": 516.3171660000003, - "min": 23.525666999999885, - "max": 26.712125000000015, - "hz": 40.672674439028796, - "period": 24.586531714285726, - "mean": 24.586531714285726, - "variance": 0.7197857225650164, - "sd": 0.8484018638387214, - "sem": 0.18513646479916948, - "df": 20, - "critical": 2.086, - "moe": 0.3861946655710675, - "p75": 24.975667000000158, - "p99": 26.712125000000015, - "p995": 26.712125000000015, - "p999": 26.712125000000015, - "sampleCount": 21, - "median": 24.294166000000132 - }, - { - "id": "-1072438084_1_1", - "name": "split 2000-page PDF (0.9MB)", - "rank": 2, - "rme": 0, - "samples": [], - "totalTime": 432.27162499999986, - "min": 432.27162499999986, - "max": 432.27162499999986, - "hz": 2.313360262774593, - "period": 432.27162499999986, - "mean": 432.27162499999986, - "variance": 0, - "sd": 0, - "sem": 0, - "df": 0, - "critical": 12.71, - "moe": 0, - "p75": 432.27162499999986, - "p99": 432.27162499999986, - "p995": 432.27162499999986, - "p999": 432.27162499999986, - "sampleCount": 1, - "median": 432.27162499999986 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", - "benchmarks": [ - { - "id": "-1072438084_2_0", - "name": "extract first 10 pages from 2000-page PDF", - "rank": 1, - "rme": 2.223598050281543, - "samples": [], - "totalTime": 518.8923730000015, - "min": 26.314790999999786, - "max": 30.981832999999824, - "hz": 36.61645649202854, - "period": 27.31012489473692, - "mean": 27.31012489473692, - "variance": 1.5873088504924244, - "sd": 1.2598844591836287, - "sem": 0.28903731779572794, - "df": 18, - "critical": 2.101, - "moe": 0.6072674046888245, - "p75": 27.280499999999847, - "p99": 30.981832999999824, - "p995": 30.981832999999824, - "p999": 30.981832999999824, - "sampleCount": 19, - "median": 26.98291700000027 - }, - { - "id": "-1072438084_2_1", - "name": "extract first 100 pages from 2000-page PDF", - "rank": 2, - "rme": 2.213921234147061, - "samples": [], - "totalTime": 528.2034549999998, - "min": 34.1123749999997, - "max": 39.86729100000048, - "hz": 28.398148209765125, - "period": 35.21356366666666, - "mean": 35.21356366666666, - "variance": 1.9814401301546256, - "sd": 1.4076363628986805, - "sem": 0.3634501460681713, - "df": 14, - "critical": 2.145, - "moe": 0.7796005633162274, - "p75": 35.0894159999998, - "p99": 39.86729100000048, - "p995": 39.86729100000048, - "p999": 39.86729100000048, - "sampleCount": 15, - "median": 34.765957999999955 - }, - { - "id": "-1072438084_2_2", - "name": "extract every 10th page from 2000-page PDF (200 pages)", - "rank": 3, - "rme": 2.0448631675208837, - "samples": [], - "totalTime": 526.8241219999973, - "min": 38.62562499999967, - "max": 42.899374999999964, - "hz": 24.67616697323527, - "period": 40.52493246153825, - "mean": 40.52493246153825, - "variance": 1.8801911150990929, - "sd": 1.3712006108148773, - "sem": 0.38030262394158326, - "df": 12, - "critical": 2.179, - "moe": 0.8286794175687099, - "p75": 41.67979099999957, - "p99": 42.899374999999964, - "p995": 42.899374999999964, - "p999": 42.899374999999964, - "sampleCount": 13, - "median": 40.114832999999635 - } - ] - } - ] - } - ] -} diff --git a/reports/baselines/skip-tiny-deflate.json b/reports/baselines/skip-tiny-deflate.json deleted file mode 100644 index d11d64e..0000000 --- a/reports/baselines/skip-tiny-deflate.json +++ /dev/null @@ -1,224 +0,0 @@ -{ - "files": [ - { - "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", - "groups": [ - { - "fullName": "benchmarks/splitting.bench.ts > Extract single page", - "benchmarks": [ - { - "id": "-1072438084_0_0", - "name": "extractPages (1 page from small PDF)", - "rank": 1, - "rme": 0.8892829061609049, - "samples": [], - "totalTime": 500.31841499999973, - "min": 0.2905420000001868, - "max": 1.9279579999999896, - "hz": 2924.1378213112557, - "period": 0.3419811449077237, - "mean": 0.3419811449077237, - "variance": 0.0035222193288500063, - "sd": 0.05934828833968176, - "sem": 0.001551622379570276, - "df": 1462, - "critical": 1.96, - "moe": 0.0030411798639577406, - "p75": 0.3381249999999909, - "p99": 0.5579999999999927, - "p995": 0.5912919999998394, - "p999": 0.6483749999999873, - "sampleCount": 1463, - "median": 0.3292500000002292 - }, - { - "id": "-1072438084_0_1", - "name": "extractPages (1 page from 100-page PDF)", - "rank": 2, - "rme": 0.6942473447539961, - "samples": [], - "totalTime": 500.1834050000034, - "min": 1.3748750000004293, - "max": 1.868916999999783, - "hz": 669.7543274151564, - "period": 1.4930847910447862, - "mean": 1.4930847910447862, - "variance": 0.009369794423687357, - "sd": 0.09679769844209808, - "sem": 0.005288623222833765, - "df": 334, - "critical": 1.96, - "moe": 0.010365701516754179, - "p75": 1.5309159999997064, - "p99": 1.8027499999998327, - "p995": 1.8117079999997259, - "p999": 1.868916999999783, - "sampleCount": 335, - "median": 1.4698750000002292 - }, - { - "id": "-1072438084_0_2", - "name": "extractPages (1 page from 2000-page PDF)", - "rank": 3, - "rme": 1.1888586380169315, - "samples": [], - "totalTime": 501.3285840000003, - "min": 24.17533400000002, - "max": 27.005499999999756, - "hz": 39.89399495321812, - "period": 25.066429200000016, - "mean": 25.066429200000016, - "variance": 0.4054496384268641, - "sd": 0.6367492743826758, - "sem": 0.14238146621433284, - "df": 19, - "critical": 2.093, - "moe": 0.2980044087865986, - "p75": 25.286291999999776, - "p99": 27.005499999999756, - "p995": 27.005499999999756, - "p999": 27.005499999999756, - "sampleCount": 20, - "median": 24.951479000000063 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", - "benchmarks": [ - { - "id": "-1072438084_1_0", - "name": "split 100-page PDF (0.1MB)", - "rank": 1, - "rme": 1.241299549103146, - "samples": [], - "totalTime": 509.5840829999979, - "min": 12.014166999999816, - "max": 13.951500000000124, - "hz": 78.49538738438218, - "period": 12.739602074999947, - "mean": 12.739602074999947, - "variance": 0.2443938158093934, - "sd": 0.4943620290934503, - "sem": 0.07816550003188641, - "df": 39, - "critical": 2.0231, - "moe": 0.15813662311450938, - "p75": 12.96362500000032, - "p99": 13.951500000000124, - "p995": 13.951500000000124, - "p999": 13.951500000000124, - "sampleCount": 40, - "median": 12.703645499999766 - }, - { - "id": "-1072438084_1_1", - "name": "split 2000-page PDF (0.9MB)", - "rank": 2, - "rme": 0, - "samples": [], - "totalTime": 244.68245800000022, - "min": 244.68245800000022, - "max": 244.68245800000022, - "hz": 4.086929680917294, - "period": 244.68245800000022, - "mean": 244.68245800000022, - "variance": 0, - "sd": 0, - "sem": 0, - "df": 0, - "critical": 12.71, - "moe": 0, - "p75": 244.68245800000022, - "p99": 244.68245800000022, - "p995": 244.68245800000022, - "p999": 244.68245800000022, - "sampleCount": 1, - "median": 244.68245800000022 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", - "benchmarks": [ - { - "id": "-1072438084_2_0", - "name": "extract first 10 pages from 2000-page PDF", - "rank": 1, - "rme": 0.9557725912034225, - "samples": [], - "totalTime": 504.57903800000076, - "min": 24.360125000000153, - "max": 25.940999999999804, - "hz": 39.637001329413074, - "period": 25.228951900000038, - "mean": 25.228951900000038, - "variance": 0.2654600064528459, - "sd": 0.5152281110856102, - "sem": 0.11520850803062373, - "df": 19, - "critical": 2.093, - "moe": 0.24113140730809546, - "p75": 25.62262499999997, - "p99": 25.940999999999804, - "p995": 25.940999999999804, - "p999": 25.940999999999804, - "sampleCount": 20, - "median": 25.240332999999737 - }, - { - "id": "-1072438084_2_1", - "name": "extract first 100 pages from 2000-page PDF", - "rank": 2, - "rme": 0.8129752528372926, - "samples": [], - "totalTime": 509.13470899999993, - "min": 25.983500000000276, - "max": 28.018167000000176, - "hz": 37.31821787856149, - "period": 26.796563631578945, - "mean": 26.796563631578945, - "variance": 0.20427459388908475, - "sd": 0.45196746994566406, - "sem": 0.10368844880320557, - "df": 18, - "critical": 2.101, - "moe": 0.2178494309355349, - "p75": 27.132666999999856, - "p99": 28.018167000000176, - "p995": 28.018167000000176, - "p999": 28.018167000000176, - "sampleCount": 19, - "median": 26.71374999999989 - }, - { - "id": "-1072438084_2_2", - "name": "extract every 10th page from 2000-page PDF (200 pages)", - "rank": 3, - "rme": 1.7260235531528858, - "samples": [], - "totalTime": 526.418208000001, - "min": 28.0747080000001, - "max": 31.5014170000004, - "hz": 34.19334613896935, - "period": 29.245456000000054, - "mean": 29.245456000000054, - "variance": 1.0301911737761424, - "sd": 1.014983336698757, - "sem": 0.23923386672368, - "df": 17, - "critical": 2.11, - "moe": 0.5047834587869647, - "p75": 29.7938750000003, - "p99": 31.5014170000004, - "p995": 31.5014170000004, - "p999": 31.5014170000004, - "sampleCount": 18, - "median": 28.999896000000717 - } - ] - } - ] - } - ] -} diff --git a/reports/baselines/sync-copier.json b/reports/baselines/sync-copier.json deleted file mode 100644 index 7b6bd2f..0000000 --- a/reports/baselines/sync-copier.json +++ /dev/null @@ -1,224 +0,0 @@ -{ - "files": [ - { - "filepath": "/Users/lucas/dev/libpdf/benchmarks/splitting.bench.ts", - "groups": [ - { - "fullName": "benchmarks/splitting.bench.ts > Extract single page", - "benchmarks": [ - { - "id": "-1072438084_0_0", - "name": "extractPages (1 page from small PDF)", - "rank": 1, - "rme": 0.8797965308418093, - "samples": [], - "totalTime": 500.17104999999685, - "min": 0.3400409999999283, - "max": 0.8508749999998599, - "hz": 2517.1388867868463, - "period": 0.39727644956314284, - "mean": 0.39727644956314284, - "variance": 0.004003720204126598, - "sd": 0.0632749571641625, - "sem": 0.0017832777658714494, - "df": 1258, - "critical": 1.96, - "moe": 0.0034952244211080407, - "p75": 0.3927090000001954, - "p99": 0.74350000000004, - "p995": 0.7761249999998654, - "p999": 0.8387500000000045, - "sampleCount": 1259, - "median": 0.38120900000012625 - }, - { - "id": "-1072438084_0_1", - "name": "extractPages (1 page from 100-page PDF)", - "rank": 2, - "rme": 0.9765406302412275, - "samples": [], - "totalTime": 500.3271310000009, - "min": 1.584292000000005, - "max": 2.372624999999971, - "hz": 575.6233914886368, - "period": 1.737246982638892, - "mean": 1.737246982638892, - "variance": 0.021576654723250537, - "sd": 0.14688994085113705, - "sem": 0.008655572771994156, - "df": 287, - "critical": 1.96, - "moe": 0.016964922633108545, - "p75": 1.756249999999909, - "p99": 2.2702919999999267, - "p995": 2.3456670000000486, - "p999": 2.372624999999971, - "sampleCount": 288, - "median": 1.6839165000000094 - }, - { - "id": "-1072438084_0_2", - "name": "extractPages (1 page from 2000-page PDF)", - "rank": 3, - "rme": 2.1980090898940152, - "samples": [], - "totalTime": 533.5146669999999, - "min": 38.59916599999997, - "max": 43.44508300000007, - "hz": 24.366715301568274, - "period": 41.039589769230766, - "mean": 41.039589769230766, - "variance": 2.227890706742653, - "sd": 1.4926120416044664, - "sem": 0.4139760961830687, - "df": 12, - "critical": 2.179, - "moe": 0.9020539135829065, - "p75": 42.321292000000085, - "p99": 43.44508300000007, - "p995": 43.44508300000007, - "p999": 43.44508300000007, - "sampleCount": 13, - "median": 40.792207999999846 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Split into single-page PDFs", - "benchmarks": [ - { - "id": "-1072438084_1_0", - "name": "split 100-page PDF (0.1MB)", - "rank": 1, - "rme": 1.9707618954593826, - "samples": [], - "totalTime": 519.5589599999998, - "min": 25.109875000000102, - "max": 29.522958000000017, - "hz": 36.569478081948596, - "period": 27.34520842105262, - "mean": 27.34520842105262, - "variance": 1.2500641058887851, - "sd": 1.11806265740735, - "sem": 0.25650116506237763, - "df": 18, - "critical": 2.101, - "moe": 0.5389089477960554, - "p75": 28.18224999999984, - "p99": 29.522958000000017, - "p995": 29.522958000000017, - "p999": 29.522958000000017, - "sampleCount": 19, - "median": 27.39729099999977 - }, - { - "id": "-1072438084_1_1", - "name": "split 2000-page PDF (0.9MB)", - "rank": 2, - "rme": 0, - "samples": [], - "totalTime": 506.605583, - "min": 506.605583, - "max": 506.605583, - "hz": 1.9739221863253724, - "period": 506.605583, - "mean": 506.605583, - "variance": 0, - "sd": 0, - "sem": 0, - "df": 0, - "critical": 12.71, - "moe": 0, - "p75": 506.605583, - "p99": 506.605583, - "p995": 506.605583, - "p999": 506.605583, - "sampleCount": 1, - "median": 506.605583 - } - ] - }, - { - "fullName": "benchmarks/splitting.bench.ts > Batch page extraction", - "benchmarks": [ - { - "id": "-1072438084_2_0", - "name": "extract first 10 pages from 2000-page PDF", - "rank": 1, - "rme": 2.7205867827906096, - "samples": [], - "totalTime": 518.4931240000024, - "min": 40.28308300000026, - "max": 45.694250000000466, - "hz": 23.143990623104088, - "period": 43.20776033333353, - "mean": 43.20776033333353, - "variance": 3.422865540761208, - "sd": 1.850098792162518, - "sem": 0.5340775178412157, - "df": 11, - "critical": 2.201, - "moe": 1.175504616768516, - "p75": 44.60529199999928, - "p99": 45.694250000000466, - "p995": 45.694250000000466, - "p999": 45.694250000000466, - "sampleCount": 12, - "median": 43.13712450000003 - }, - { - "id": "-1072438084_2_1", - "name": "extract first 100 pages from 2000-page PDF", - "rank": 2, - "rme": 6.90077055615189, - "samples": [], - "totalTime": 538.6501679999983, - "min": 48.26045899999917, - "max": 65.11799999999948, - "hz": 18.56492505540262, - "period": 53.86501679999983, - "mean": 53.86501679999983, - "variance": 27.003718600062566, - "sd": 5.196510232844978, - "sem": 1.6432808220162056, - "df": 9, - "critical": 2.262, - "moe": 3.717101219400657, - "p75": 54.15062500000022, - "p99": 65.11799999999948, - "p995": 65.11799999999948, - "p999": 65.11799999999948, - "sampleCount": 10, - "median": 53.544791499999974 - }, - { - "id": "-1072438084_2_2", - "name": "extract every 10th page from 2000-page PDF (200 pages)", - "rank": 3, - "rme": 3.996294175601932, - "samples": [], - "totalTime": 509.82821200000035, - "min": 52.99541700000009, - "max": 62.211042000000816, - "hz": 17.65300504790424, - "period": 56.64757911111115, - "mean": 56.64757911111115, - "variance": 8.673642398208653, - "sd": 2.9451048195622263, - "sem": 0.9817016065207421, - "df": 8, - "critical": 2.306, - "moe": 2.2638039046368315, - "p75": 58.50329199999942, - "p99": 62.211042000000816, - "p995": 62.211042000000816, - "p999": 62.211042000000816, - "sampleCount": 9, - "median": 56.19641700000011 - } - ] - } - ] - } - ] -} diff --git a/reports/benchmarks.md b/reports/benchmarks.md index b0b0010..8a563e5 100644 --- a/reports/benchmarks.md +++ b/reports/benchmarks.md @@ -1,8 +1,8 @@ # Benchmark Report -> Generated on 2026-02-16 at 12:50:00 UTC +> Generated on 2026-02-16 at 21:19:11 UTC > -> System: darwin | Apple M4 Pro (12 cores) | 24GB RAM | Bun 1.3.5 +> System: linux | AMD EPYC 7763 64-Core Processor (4 cores) | 16GB RAM | Bun 1.3.9 --- @@ -22,91 +22,100 @@ | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :-------- | ------: | ------: | ------: | -----: | ------: | -| libpdf | 895.0 | 1.12ms | 1.59ms | ±1.07% | 448 | -| pdf-lib | 36.7 | 27.21ms | 29.03ms | ±1.90% | 19 | +| libpdf | 362.6 | 2.76ms | 3.88ms | ±1.62% | 182 | +| pdf-lib | 25.0 | 39.95ms | 44.72ms | ±3.76% | 13 | -- **libpdf** is 24.35x faster than pdf-lib +- **libpdf** is 14.48x faster than pdf-lib ### Create blank PDF -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------- | ------: | ---: | ----: | -----: | ------: | -| libpdf | 38.6K | 26us | 45us | ±0.84% | 19,283 | -| pdf-lib | 10.3K | 97us | 461us | ±1.82% | 5,172 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ----: | -----: | -----: | ------: | +| libpdf | 10.7K | 94us | 184us | ±2.96% | 5,333 | +| pdf-lib | 2.3K | 437us | 1.65ms | ±2.65% | 1,143 | -- **libpdf** is 3.73x faster than pdf-lib +- **libpdf** is 4.67x faster than pdf-lib ### Add 10 pages -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------- | ------: | ----: | ----: | -----: | ------: | -| libpdf | 19.1K | 52us | 87us | ±0.91% | 9,562 | -| pdf-lib | 6.3K | 158us | 770us | ±2.55% | 3,173 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ----: | -----: | -----: | ------: | +| libpdf | 5.7K | 174us | 653us | ±1.83% | 2,867 | +| pdf-lib | 1.9K | 540us | 2.13ms | ±3.10% | 926 | -- **libpdf** is 3.01x faster than pdf-lib +- **libpdf** is 3.10x faster than pdf-lib ### Draw 50 rectangles | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :-------- | ------: | -----: | -----: | -----: | ------: | -| pdf-lib | 2.2K | 458us | 1.80ms | ±3.80% | 1,093 | -| libpdf | 627.4 | 1.59ms | 2.30ms | ±1.44% | 314 | +| pdf-lib | 474.2 | 2.11ms | 7.63ms | ±9.71% | 238 | +| libpdf | 156.7 | 6.38ms | 8.63ms | ±2.56% | 79 | -- **pdf-lib** is 3.48x faster than libpdf +- **pdf-lib** is 3.03x faster than libpdf ### Load and save PDF -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------- | ------: | ------: | ------: | -----: | ------: | -| libpdf | 909.3 | 1.10ms | 1.48ms | ±0.88% | 456 | -| pdf-lib | 22.0 | 45.45ms | 58.21ms | ±6.77% | 11 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | -------: | -----: | ------: | +| libpdf | 339.6 | 2.94ms | 5.18ms | ±3.06% | 171 | +| pdf-lib | 10.8 | 92.96ms | 109.93ms | ±5.74% | 10 | -- **libpdf** is 41.33x faster than pdf-lib +- **libpdf** is 31.57x faster than pdf-lib ### Load, modify, and save PDF | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :-------- | ------: | ------: | ------: | -----: | ------: | -| libpdf | 37.9 | 26.38ms | 33.50ms | ±5.76% | 20 | -| pdf-lib | 23.1 | 43.25ms | 44.89ms | ±1.52% | 12 | +| libpdf | 13.5 | 73.86ms | 88.68ms | ±8.49% | 10 | +| pdf-lib | 11.5 | 87.10ms | 93.25ms | ±3.21% | 10 | -- **libpdf** is 1.64x faster than pdf-lib +- **libpdf** is 1.18x faster than pdf-lib ### Extract single page from 100-page PDF -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------- | ------: | -----: | -----: | -----: | ------: | -| libpdf | 503.4 | 1.99ms | 3.10ms | ±1.55% | 252 | -| pdf-lib | 155.9 | 6.41ms | 7.44ms | ±1.53% | 79 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | -----: | ------: | -----: | ------: | +| libpdf | 183.8 | 5.44ms | 8.10ms | ±2.45% | 92 | +| pdf-lib | 104.3 | 9.58ms | 11.64ms | ±1.78% | 53 | -- **libpdf** is 3.23x faster than pdf-lib +- **libpdf** is 1.76x faster than pdf-lib ### Split 100-page PDF into single-page PDFs -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------- | ------: | ------: | ------: | -----: | ------: | -| libpdf | 35.3 | 28.29ms | 29.79ms | ±1.35% | 18 | -| pdf-lib | 35.0 | 28.58ms | 32.30ms | ±3.07% | 18 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | -------: | -----: | ------: | +| pdf-lib | 11.1 | 90.04ms | 95.21ms | ±5.07% | 6 | +| libpdf | 10.7 | 93.65ms | 104.73ms | ±9.17% | 6 | -- **libpdf** is 1.01x faster than pdf-lib +- **pdf-lib** is 1.04x faster than libpdf + +### Split 2000-page PDF into single-page PDFs (0.9MB) + +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ----: | ----: | -----: | ------: | +| libpdf | 0.627 | 1.60s | 1.60s | ±0.00% | 1 | +| pdf-lib | 0.585 | 1.71s | 1.71s | ±0.00% | 1 | + +- **libpdf** is 1.07x faster than pdf-lib ### Copy 10 pages between documents -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------- | ------: | -----: | ------: | -----: | ------: | -| libpdf | 334.0 | 2.99ms | 3.55ms | ±1.12% | 168 | -| pdf-lib | 103.7 | 9.64ms | 14.95ms | ±3.70% | 52 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------- | ------: | ------: | ------: | -----: | ------: | +| libpdf | 103.7 | 9.64ms | 14.56ms | ±3.04% | 52 | +| pdf-lib | 78.9 | 12.67ms | 14.16ms | ±1.65% | 40 | -- **libpdf** is 3.22x faster than pdf-lib +- **libpdf** is 1.31x faster than pdf-lib ### Merge 2 x 100-page PDFs | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :-------- | ------: | ------: | ------: | -----: | ------: | -| libpdf | 47.1 | 21.23ms | 24.31ms | ±2.08% | 24 | -| pdf-lib | 22.9 | 43.64ms | 47.48ms | ±2.23% | 12 | +| pdf-lib | 17.9 | 55.88ms | 58.47ms | ±1.91% | 9 | +| libpdf | 12.6 | 79.05ms | 79.54ms | ±0.45% | 7 | -- **libpdf** is 2.06x faster than pdf-lib +- **pdf-lib** is 1.41x faster than libpdf ## Copying @@ -114,88 +123,88 @@ | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :------------------------------ | ------: | ------: | ------: | -----: | ------: | -| copy 1 page | 2.3K | 429us | 735us | ±1.26% | 1,166 | -| copy 10 pages from 100-page PDF | 344.1 | 2.91ms | 3.57ms | ±1.12% | 173 | -| copy all 100 pages | 92.3 | 10.84ms | 13.86ms | ±1.96% | 47 | +| copy 1 page | 731.3 | 1.37ms | 2.63ms | ±3.01% | 366 | +| copy 10 pages from 100-page PDF | 113.0 | 8.85ms | 12.36ms | ±2.59% | 57 | +| copy all 100 pages | 25.7 | 38.97ms | 41.36ms | ±1.41% | 13 | -- **copy 1 page** is 6.78x faster than copy 10 pages from 100-page PDF -- **copy 1 page** is 25.27x faster than copy all 100 pages +- **copy 1 page** is 6.47x faster than copy 10 pages from 100-page PDF +- **copy 1 page** is 28.50x faster than copy all 100 pages ### Duplicate pages within same document -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :---------------------------------------- | ------: | ----: | ----: | -----: | ------: | -| duplicate all pages (double the document) | 2.2K | 461us | 798us | ±0.89% | 1,086 | -| duplicate page 0 | 2.2K | 464us | 758us | ±0.77% | 1,078 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------------------------- | ------: | -----: | -----: | -----: | ------: | +| duplicate page 0 | 779.3 | 1.28ms | 2.49ms | ±2.00% | 390 | +| duplicate all pages (double the document) | 778.8 | 1.28ms | 2.48ms | ±2.14% | 390 | -- **duplicate all pages (double the document)** is 1.01x faster than duplicate page 0 +- **duplicate page 0** is 1.00x faster than duplicate all pages (double the document) ### Merge PDFs | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :---------------------- | ------: | ------: | ------: | -----: | ------: | -| merge 2 small PDFs | 1.4K | 712us | 1.12ms | ±1.00% | 702 | -| merge 10 small PDFs | 254.5 | 3.93ms | 5.73ms | ±2.08% | 128 | -| merge 2 x 100-page PDFs | 48.6 | 20.58ms | 26.32ms | ±3.33% | 25 | +| merge 2 small PDFs | 511.1 | 1.96ms | 3.22ms | ±1.91% | 256 | +| merge 10 small PDFs | 93.4 | 10.71ms | 14.59ms | ±2.26% | 47 | +| merge 2 x 100-page PDFs | 13.0 | 77.00ms | 82.43ms | ±3.35% | 7 | -- **merge 2 small PDFs** is 5.51x faster than merge 10 small PDFs -- **merge 2 small PDFs** is 28.89x faster than merge 2 x 100-page PDFs +- **merge 2 small PDFs** is 5.47x faster than merge 10 small PDFs +- **merge 2 small PDFs** is 39.35x faster than merge 2 x 100-page PDFs ## Drawing -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :---------------------------------- | ------: | -----: | -----: | -----: | ------: | -| draw 100 lines | 399.8 | 2.50ms | 2.74ms | ±0.51% | 200 | -| draw 100 rectangles | 360.3 | 2.78ms | 3.55ms | ±1.22% | 181 | -| draw 100 circles | 279.7 | 3.58ms | 4.42ms | ±1.33% | 140 | -| draw 100 text lines (standard font) | 259.5 | 3.85ms | 4.29ms | ±0.61% | 130 | -| create 10 pages with mixed content | 193.0 | 5.18ms | 6.40ms | ±1.35% | 97 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------------------------- | ------: | ------: | ------: | -----: | ------: | +| draw 100 lines | 91.8 | 10.89ms | 13.01ms | ±1.41% | 46 | +| draw 100 rectangles | 79.3 | 12.61ms | 16.38ms | ±3.74% | 40 | +| draw 100 circles | 69.0 | 14.50ms | 18.01ms | ±2.25% | 35 | +| draw 100 text lines (standard font) | 64.9 | 15.41ms | 20.87ms | ±3.54% | 33 | +| create 10 pages with mixed content | 46.8 | 21.39ms | 22.42ms | ±1.20% | 24 | -- **draw 100 lines** is 1.11x faster than draw 100 rectangles -- **draw 100 lines** is 1.43x faster than draw 100 circles -- **draw 100 lines** is 1.54x faster than draw 100 text lines (standard font) -- **draw 100 lines** is 2.07x faster than create 10 pages with mixed content +- **draw 100 lines** is 1.16x faster than draw 100 rectangles +- **draw 100 lines** is 1.33x faster than draw 100 circles +- **draw 100 lines** is 1.41x faster than draw 100 text lines (standard font) +- **draw 100 lines** is 1.96x faster than create 10 pages with mixed content ## Forms -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :---------------- | ------: | -----: | -----: | -----: | ------: | -| read field values | 702.6 | 1.42ms | 1.85ms | ±0.97% | 352 | -| get form fields | 677.3 | 1.48ms | 2.36ms | ±1.45% | 339 | -| flatten form | 198.8 | 5.03ms | 5.84ms | ±1.28% | 100 | -| fill text fields | 155.1 | 6.45ms | 7.36ms | ±1.25% | 78 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :---------------- | ------: | ------: | ------: | -----: | ------: | +| read field values | 286.0 | 3.50ms | 6.12ms | ±2.24% | 143 | +| get form fields | 247.2 | 4.05ms | 9.66ms | ±5.08% | 124 | +| flatten form | 74.3 | 13.46ms | 17.19ms | ±2.79% | 38 | +| fill text fields | 55.3 | 18.07ms | 24.87ms | ±3.84% | 28 | -- **read field values** is 1.04x faster than get form fields -- **read field values** is 3.53x faster than flatten form -- **read field values** is 4.53x faster than fill text fields +- **read field values** is 1.16x faster than get form fields +- **read field values** is 3.85x faster than flatten form +- **read field values** is 5.17x faster than fill text fields ## Loading | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :--------------------- | ------: | -----: | -----: | -----: | ------: | -| load small PDF (888B) | 38.7K | 26us | 37us | ±1.10% | 19,336 | -| load medium PDF (19KB) | 23.8K | 42us | 54us | ±0.84% | 11,904 | -| load form PDF (116KB) | 1.6K | 639us | 1.07ms | ±0.96% | 782 | -| load heavy PDF (9.9MB) | 909.7 | 1.10ms | 1.46ms | ±0.81% | 455 | +| load small PDF (888B) | 14.7K | 68us | 139us | ±1.61% | 7,359 | +| load medium PDF (19KB) | 9.7K | 103us | 197us | ±1.32% | 4,852 | +| load form PDF (116KB) | 661.3 | 1.51ms | 2.71ms | ±2.06% | 331 | +| load heavy PDF (9.9MB) | 417.6 | 2.39ms | 3.61ms | ±1.55% | 209 | -- **load small PDF (888B)** is 1.62x faster than load medium PDF (19KB) -- **load small PDF (888B)** is 24.73x faster than load form PDF (116KB) -- **load small PDF (888B)** is 42.51x faster than load heavy PDF (9.9MB) +- **load small PDF (888B)** is 1.52x faster than load medium PDF (19KB) +- **load small PDF (888B)** is 22.25x faster than load form PDF (116KB) +- **load small PDF (888B)** is 35.24x faster than load heavy PDF (9.9MB) ## Saving | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :--------------------------------- | ------: | -----: | -----: | -----: | ------: | -| save unmodified (19KB) | 22.2K | 45us | 63us | ±0.78% | 11,103 | -| incremental save (19KB) | 6.9K | 144us | 373us | ±1.14% | 3,461 | -| save with modifications (19KB) | 2.4K | 422us | 799us | ±0.97% | 1,185 | -| save heavy PDF (9.9MB) | 850.9 | 1.18ms | 1.58ms | ±0.91% | 426 | -| incremental save heavy PDF (9.9MB) | 494.7 | 2.02ms | 2.37ms | ±0.74% | 248 | +| save unmodified (19KB) | 7.9K | 126us | 321us | ±1.64% | 3,956 | +| incremental save (19KB) | 1.9K | 527us | 1.06ms | ±1.51% | 948 | +| save with modifications (19KB) | 762.8 | 1.31ms | 2.72ms | ±2.37% | 382 | +| save heavy PDF (9.9MB) | 413.5 | 2.42ms | 3.13ms | ±1.12% | 207 | +| incremental save heavy PDF (9.9MB) | 137.6 | 7.26ms | 7.58ms | ±0.44% | 69 | -- **save unmodified (19KB)** is 3.21x faster than incremental save (19KB) -- **save unmodified (19KB)** is 9.37x faster than save with modifications (19KB) -- **save unmodified (19KB)** is 26.10x faster than save heavy PDF (9.9MB) -- **save unmodified (19KB)** is 44.89x faster than incremental save heavy PDF (9.9MB) +- **save unmodified (19KB)** is 4.17x faster than incremental save (19KB) +- **save unmodified (19KB)** is 10.37x faster than save with modifications (19KB) +- **save unmodified (19KB)** is 19.13x faster than save heavy PDF (9.9MB) +- **save unmodified (19KB)** is 57.48x faster than incremental save heavy PDF (9.9MB) ## Splitting @@ -203,32 +212,32 @@ | Benchmark | ops/sec | Mean | p99 | RME | Samples | | :--------------------------------------- | ------: | ------: | ------: | -----: | ------: | -| extractPages (1 page from small PDF) | 2.2K | 452us | 931us | ±1.63% | 1,106 | -| extractPages (1 page from 100-page PDF) | 536.3 | 1.86ms | 3.04ms | ±1.54% | 269 | -| extractPages (1 page from 2000-page PDF) | 24.7 | 40.43ms | 42.46ms | ±2.46% | 13 | +| extractPages (1 page from small PDF) | 753.5 | 1.33ms | 2.47ms | ±2.83% | 377 | +| extractPages (1 page from 100-page PDF) | 197.6 | 5.06ms | 9.10ms | ±3.08% | 99 | +| extractPages (1 page from 2000-page PDF) | 13.1 | 76.08ms | 78.10ms | ±1.22% | 10 | -- **extractPages (1 page from small PDF)** is 4.12x faster than extractPages (1 page from 100-page PDF) -- **extractPages (1 page from small PDF)** is 89.36x faster than extractPages (1 page from 2000-page PDF) +- **extractPages (1 page from small PDF)** is 3.81x faster than extractPages (1 page from 100-page PDF) +- **extractPages (1 page from small PDF)** is 57.32x faster than extractPages (1 page from 2000-page PDF) ### Split into single-page PDFs -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :-------------------------- | ------: | -------: | -------: | -----: | ------: | -| split 100-page PDF (0.1MB) | 32.2 | 31.02ms | 35.44ms | ±2.72% | 17 | -| split 2000-page PDF (0.9MB) | 1.8 | 550.66ms | 550.66ms | ±0.00% | 1 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :-------------------------- | ------: | ------: | -------: | -----: | ------: | +| split 100-page PDF (0.1MB) | 10.9 | 91.83ms | 102.50ms | ±6.62% | 6 | +| split 2000-page PDF (0.9MB) | 0.650 | 1.54s | 1.54s | ±0.00% | 1 | -- **split 100-page PDF (0.1MB)** is 17.75x faster than split 2000-page PDF (0.9MB) +- **split 100-page PDF (0.1MB)** is 16.76x faster than split 2000-page PDF (0.9MB) ### Batch page extraction -| Benchmark | ops/sec | Mean | p99 | RME | Samples | -| :----------------------------------------------------- | ------: | ------: | ------: | -----: | ------: | -| extract first 10 pages from 2000-page PDF | 23.3 | 42.90ms | 46.80ms | ±3.59% | 12 | -| extract first 100 pages from 2000-page PDF | 20.1 | 49.79ms | 52.72ms | ±2.98% | 11 | -| extract every 10th page from 2000-page PDF (200 pages) | 18.6 | 53.74ms | 59.30ms | ±3.19% | 10 | +| Benchmark | ops/sec | Mean | p99 | RME | Samples | +| :----------------------------------------------------- | ------: | -------: | -------: | -----: | ------: | +| extract first 10 pages from 2000-page PDF | 12.2 | 81.95ms | 91.73ms | ±4.95% | 7 | +| extract first 100 pages from 2000-page PDF | 9.2 | 109.19ms | 110.20ms | ±1.20% | 5 | +| extract every 10th page from 2000-page PDF (200 pages) | 8.0 | 125.67ms | 127.43ms | ±1.82% | 4 | -- **extract first 10 pages from 2000-page PDF** is 1.16x faster than extract first 100 pages from 2000-page PDF -- **extract first 10 pages from 2000-page PDF** is 1.25x faster than extract every 10th page from 2000-page PDF (200 pages) +- **extract first 10 pages from 2000-page PDF** is 1.33x faster than extract first 100 pages from 2000-page PDF +- **extract first 10 pages from 2000-page PDF** is 1.53x faster than extract every 10th page from 2000-page PDF (200 pages) --- From d2b074190ef10ca0eac4868912d2a058cc8f393d Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 14:03:33 +1100 Subject: [PATCH 7/9] ci: simplify PR benchmark workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just run benchmarks and post results as a PR comment. No base comparison — check manually if needed. --- .github/workflows/bench-pr.yml | 36 ++------ scripts/bench-comment.ts | 95 +++++++++++++++++++++ scripts/bench-compare.ts | 150 --------------------------------- 3 files changed, 103 insertions(+), 178 deletions(-) create mode 100644 scripts/bench-comment.ts delete mode 100644 scripts/bench-compare.ts diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index 9321de1..d82a1d7 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -17,46 +17,26 @@ permissions: jobs: bench: - name: Benchmark Comparison + name: Run Benchmarks runs-on: ubuntu-latest steps: - - name: Checkout PR + - name: Checkout uses: actions/checkout@v4 - with: - path: pr - - - name: Checkout base - uses: actions/checkout@v4 - with: - ref: ${{ github.event.pull_request.base.sha }} - path: base - name: Setup Bun uses: oven-sh/setup-bun@v2 - - name: Install dependencies (base) + - name: Install dependencies run: bun install --frozen-lockfile - working-directory: base - - - name: Install dependencies (PR) - run: bun install --frozen-lockfile - working-directory: pr - - - name: Run benchmarks (base) - run: bun run bench -- --outputJson ../base-results.json benchmarks/splitting.bench.ts - working-directory: base - continue-on-error: true - - name: Run benchmarks (PR) - run: bun run bench -- --outputJson ../pr-results.json benchmarks/splitting.bench.ts - working-directory: pr + - name: Run benchmarks + run: bun run bench -- --outputJson bench-results.json benchmarks/splitting.bench.ts - - name: Generate comparison - run: bun run pr/scripts/bench-compare.ts ../base-results.json ../pr-results.json bench-comment.md - working-directory: pr + - name: Generate comment + run: bun run scripts/bench-comment.ts bench-results.json bench-comment.md - name: Post or update PR comment uses: marocchino/sticky-pull-request-comment@v2 with: header: benchmark-results - path: pr/bench-comment.md + path: bench-comment.md diff --git a/scripts/bench-comment.ts b/scripts/bench-comment.ts new file mode 100644 index 0000000..8cc354d --- /dev/null +++ b/scripts/bench-comment.ts @@ -0,0 +1,95 @@ +/** + * Format benchmark JSON results as a markdown comment for PRs. + * + * Usage: + * bun run scripts/bench-comment.ts + */ + +import { readFileSync } from "node:fs"; + +interface Bench { + name: string; + mean: number; + hz: number; + p99: number; + rme: number; + sampleCount: number; +} + +interface Group { + fullName: string; + benchmarks: Bench[]; +} + +interface File { + filepath: string; + groups: Group[]; +} + +interface Output { + files: File[]; +} + +function formatMs(ms: number): string { + if (ms >= 1000) { + return `${(ms / 1000).toFixed(2)}s`; + } + + if (ms >= 1) { + return `${ms.toFixed(2)}ms`; + } + + return `${(ms * 1000).toFixed(0)}μs`; +} + +function formatRme(rme: number): string { + return `±${rme.toFixed(1)}%`; +} + +// ───────────────────────────────────────────────────────────────────────────── + +const [inputPath, outputPath] = process.argv.slice(2); + +if (!inputPath || !outputPath) { + console.error("Usage: bun run scripts/bench-comment.ts "); + process.exit(1); +} + +const data: Output = JSON.parse(readFileSync(inputPath, "utf-8")); + +const lines: string[] = []; +lines.push("## Benchmark Results"); +lines.push(""); + +for (const file of data.files) { + for (const group of file.groups) { + const groupName = group.fullName.includes(" > ") + ? group.fullName.split(" > ").slice(1).join(" > ") + : group.fullName; + + lines.push(`**${groupName}**`); + lines.push(""); + lines.push("| Benchmark | Mean | p99 | RME | Samples |"); + lines.push("|:---|---:|---:|---:|---:|"); + + for (const b of group.benchmarks) { + lines.push( + `| ${b.name} | ${formatMs(b.mean)} | ${formatMs(b.p99)} | ${formatRme(b.rme)} | ${b.sampleCount} |`, + ); + } + + lines.push(""); + } +} + +lines.push( + `
Environment\n\n` + + `- Runner: \`ubuntu-latest\`\n` + + `- Runtime: Bun ${process.versions.bun}\n\n` + + `*Results are machine-dependent.*\n` + + `
`, +); + +const body = lines.join("\n"); +await Bun.write(outputPath, body); +console.log(body); diff --git a/scripts/bench-compare.ts b/scripts/bench-compare.ts deleted file mode 100644 index 2be007e..0000000 --- a/scripts/bench-compare.ts +++ /dev/null @@ -1,150 +0,0 @@ -/** - * Compare two benchmark JSON files and produce a markdown summary. - * - * Usage: - * bun run scripts/bench-compare.ts - * - * If does not exist, outputs PR-only results. - */ - -import { existsSync, readFileSync } from "node:fs"; - -interface Bench { - name: string; - mean: number; - hz: number; - p99: number; - rme: number; - sampleCount: number; -} - -interface Group { - fullName: string; - benchmarks: Bench[]; -} - -interface File { - filepath: string; - groups: Group[]; -} - -interface Output { - files: File[]; -} - -function buildMap(data: Output): Map { - const map = new Map(); - - for (const file of data.files) { - for (const group of file.groups) { - for (const b of group.benchmarks) { - map.set(b.name, b); - } - } - } - - return map; -} - -function formatMs(ms: number): string { - if (ms >= 1000) { - return `${(ms / 1000).toFixed(2)}s`; - } - - if (ms >= 1) { - return `${ms.toFixed(2)}ms`; - } - - return `${(ms * 1000).toFixed(0)}μs`; -} - -// ───────────────────────────────────────────────────────────────────────────── - -const [basePath, prPath, outputPath] = process.argv.slice(2); - -if (!prPath || !outputPath) { - console.error("Usage: bun run scripts/bench-compare.ts "); - process.exit(1); -} - -const pr: Output = JSON.parse(readFileSync(prPath, "utf-8")); -const prMap = buildMap(pr); -const hasBase = existsSync(basePath); - -const lines: string[] = []; -lines.push("## Benchmark Results"); -lines.push(""); - -if (hasBase) { - const base: Output = JSON.parse(readFileSync(basePath, "utf-8")); - const baseMap = buildMap(base); - - lines.push("| Benchmark | Base | PR | Change |"); - lines.push("|:---|---:|---:|---:|"); - - let hasRegression = false; - - for (const [name, b] of baseMap) { - const p = prMap.get(name); - - if (!p) { - continue; - } - - const ratio = b.mean / p.mean; - let change: string; - let indicator = ""; - - if (ratio > 1.05) { - change = `${ratio.toFixed(2)}x faster`; - indicator = " 🟢"; - } else if (ratio < 0.95) { - change = `${(1 / ratio).toFixed(2)}x slower`; - indicator = " 🔴"; - hasRegression = true; - } else { - change = "~same"; - } - - lines.push(`| ${name} | ${formatMs(b.mean)} | ${formatMs(p.mean)} | ${change}${indicator} |`); - } - - // Show benchmarks only in PR (new benchmarks) - for (const [name, p] of prMap) { - if (!baseMap.has(name)) { - lines.push(`| ${name} | — | ${formatMs(p.mean)} | *new* |`); - } - } - - lines.push(""); - - if (hasRegression) { - lines.push("> ⚠️ **Performance regression detected.** Please review the changes above."); - } else { - lines.push("> ✅ No performance regressions detected."); - } -} else { - // No base results — just show PR numbers - lines.push("*No base benchmarks available for comparison (new benchmark suite?).*"); - lines.push(""); - lines.push("| Benchmark | Mean | p99 | Samples |"); - lines.push("|:---|---:|---:|---:|"); - - for (const [name, p] of prMap) { - lines.push(`| ${name} | ${formatMs(p.mean)} | ${formatMs(p.p99)} | ${p.sampleCount} |`); - } -} - -lines.push(""); -lines.push( - `
Environment\n\n` + - `- Runner: \`ubuntu-latest\`\n` + - `- Runtime: Bun ${process.versions.bun}\n` + - `- Benchmark: \`benchmarks/splitting.bench.ts\`\n\n` + - `*Results are machine-dependent. Thresholds: >5% faster 🟢, >5% slower 🔴.*\n` + - `
`, -); - -const body = lines.join("\n"); -await Bun.write(outputPath, body); -console.log(body); From cbf5f98efbcc66355707d34e1d9508ff3b1ab57b Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 14:06:24 +1100 Subject: [PATCH 8/9] ci: run all benchmarks, use collapsible sections in PR comment --- .github/workflows/bench-pr.yml | 2 +- scripts/bench-comment.ts | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index d82a1d7..d0288d7 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -30,7 +30,7 @@ jobs: run: bun install --frozen-lockfile - name: Run benchmarks - run: bun run bench -- --outputJson bench-results.json benchmarks/splitting.bench.ts + run: bun run bench -- --outputJson bench-results.json - name: Generate comment run: bun run scripts/bench-comment.ts bench-results.json bench-comment.md diff --git a/scripts/bench-comment.ts b/scripts/bench-comment.ts index 8cc354d..3dd103f 100644 --- a/scripts/bench-comment.ts +++ b/scripts/bench-comment.ts @@ -1,6 +1,8 @@ /** * Format benchmark JSON results as a markdown comment for PRs. * + * Each benchmark file gets its own collapsible section. + * * Usage: * bun run scripts/bench-comment.ts */ @@ -46,6 +48,18 @@ function formatRme(rme: number): string { return `±${rme.toFixed(1)}%`; } +function fileLabel(filepath: string): string { + const match = filepath.match(/([^/]+)\.bench\.ts$/); + + if (!match) { + return filepath; + } + + const name = match[1]; + + return name.charAt(0).toUpperCase() + name.slice(1); +} + // ───────────────────────────────────────────────────────────────────────────── const [inputPath, outputPath] = process.argv.slice(2); @@ -62,6 +76,12 @@ lines.push("## Benchmark Results"); lines.push(""); for (const file of data.files) { + const label = fileLabel(file.filepath); + + lines.push(`
`); + lines.push(`${label}`); + lines.push(""); + for (const group of file.groups) { const groupName = group.fullName.includes(" > ") ? group.fullName.split(" > ").slice(1).join(" > ") @@ -80,6 +100,9 @@ for (const file of data.files) { lines.push(""); } + + lines.push(`
`); + lines.push(""); } lines.push( From 44141fc58649cd79b8e1383135678de0b679b15c Mon Sep 17 00:00:00 2001 From: Lucas Smith Date: Wed, 18 Feb 2026 14:07:00 +1100 Subject: [PATCH 9/9] ci: use actual runner info instead of hardcoded string --- .github/workflows/bench-pr.yml | 2 ++ scripts/bench-comment.ts | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml index d0288d7..cd3e0f0 100644 --- a/.github/workflows/bench-pr.yml +++ b/.github/workflows/bench-pr.yml @@ -34,6 +34,8 @@ jobs: - name: Generate comment run: bun run scripts/bench-comment.ts bench-results.json bench-comment.md + env: + BENCH_RUNNER: ${{ runner.os }} (${{ runner.arch }}) - name: Post or update PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/scripts/bench-comment.ts b/scripts/bench-comment.ts index 3dd103f..b6df46b 100644 --- a/scripts/bench-comment.ts +++ b/scripts/bench-comment.ts @@ -105,9 +105,11 @@ for (const file of data.files) { lines.push(""); } +const runner = process.env.BENCH_RUNNER ?? "local"; + lines.push( `
Environment\n\n` + - `- Runner: \`ubuntu-latest\`\n` + + `- Runner: \`${runner}\`\n` + `- Runtime: Bun ${process.versions.bun}\n\n` + `*Results are machine-dependent.*\n` + `
`,