diff --git a/.github/workflows/bench-pr.yml b/.github/workflows/bench-pr.yml new file mode 100644 index 0000000..cd3e0f0 --- /dev/null +++ b/.github/workflows/bench-pr.yml @@ -0,0 +1,44 @@ +name: PR Benchmarks + +on: + pull_request: + branches: [main] + paths: + - "src/**/*.ts" + - "benchmarks/**/*.ts" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + bench: + name: Run Benchmarks + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Bun + uses: oven-sh/setup-bun@v2 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Run benchmarks + run: bun run bench -- --outputJson bench-results.json + + - name: Generate comment + run: bun run scripts/bench-comment.ts bench-results.json bench-comment.md + env: + BENCH_RUNNER: ${{ runner.os }} (${{ runner.arch }}) + + - name: Post or update PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + header: benchmark-results + path: bench-comment.md diff --git a/bun.lock b/bun.lock index 2d34aa2..23cceae 100644 --- a/bun.lock +++ b/bun.lock @@ -8,6 +8,7 @@ "@noble/ciphers": "^2.1.1", "@noble/hashes": "^2.0.1", "@scure/base": "^2.0.0", + "lru-cache": "^11.2.6", "pako": "^2.1.0", "pkijs": "^3.3.3", }, @@ -492,7 +493,7 @@ "long": ["long@5.3.2", "", {}, "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="], - "lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + "lru-cache": ["lru-cache@11.2.6", "", {}, "sha512-ESL2CrkS/2wTPfuend7Zhkzo2u0daGJ/A2VucJOgQ/C48S/zB8MMeMHSGKYpXhIjbPxfuezITkaBH1wqv00DDQ=="], "magic-string": ["magic-string@0.30.21", "", { "dependencies": { "@jridgewell/sourcemap-codec": "^1.5.5" } }, "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ=="], @@ -706,6 +707,8 @@ "micromatch/picomatch": ["picomatch@2.3.1", "", {}, "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA=="], + "path-scurry/lru-cache": ["lru-cache@10.4.3", "", {}, "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="], + "pdf-lib/pako": ["pako@1.0.11", "", {}, "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="], "pkijs/@noble/hashes": ["@noble/hashes@1.4.0", "", {}, "sha512-V1JJ1WTRUqHHrOSh597hURcMqVKVGL/ea3kv0gSnEdsEZ0/+VyPghM1lMNGc00z7CIQorSvbKpuJkxvuHbvdbg=="], diff --git a/package.json b/package.json index 230f00b..0005466 100644 --- a/package.json +++ b/package.json @@ -67,6 +67,7 @@ "@noble/ciphers": "^2.1.1", "@noble/hashes": "^2.0.1", "@scure/base": "^2.0.0", + "lru-cache": "^11.2.6", "pako": "^2.1.0", "pkijs": "^3.3.3" }, diff --git a/scripts/bench-comment.ts b/scripts/bench-comment.ts new file mode 100644 index 0000000..b6df46b --- /dev/null +++ b/scripts/bench-comment.ts @@ -0,0 +1,120 @@ +/** + * Format benchmark JSON results as a markdown comment for PRs. + * + * Each benchmark file gets its own collapsible section. + * + * Usage: + * bun run scripts/bench-comment.ts + */ + +import { readFileSync } from "node:fs"; + +interface Bench { + name: string; + mean: number; + hz: number; + p99: number; + rme: number; + sampleCount: number; +} + +interface Group { + fullName: string; + benchmarks: Bench[]; +} + +interface File { + filepath: string; + groups: Group[]; +} + +interface Output { + files: File[]; +} + +function formatMs(ms: number): string { + if (ms >= 1000) { + return `${(ms / 1000).toFixed(2)}s`; + } + + if (ms >= 1) { + return `${ms.toFixed(2)}ms`; + } + + return `${(ms * 1000).toFixed(0)}μs`; +} + +function formatRme(rme: number): string { + return `±${rme.toFixed(1)}%`; +} + +function fileLabel(filepath: string): string { + const match = filepath.match(/([^/]+)\.bench\.ts$/); + + if (!match) { + return filepath; + } + + const name = match[1]; + + return name.charAt(0).toUpperCase() + name.slice(1); +} + +// ───────────────────────────────────────────────────────────────────────────── + +const [inputPath, outputPath] = process.argv.slice(2); + +if (!inputPath || !outputPath) { + console.error("Usage: bun run scripts/bench-comment.ts "); + process.exit(1); +} + +const data: Output = JSON.parse(readFileSync(inputPath, "utf-8")); + +const lines: string[] = []; +lines.push("## Benchmark Results"); +lines.push(""); + +for (const file of data.files) { + const label = fileLabel(file.filepath); + + lines.push(`
`); + lines.push(`${label}`); + lines.push(""); + + for (const group of file.groups) { + const groupName = group.fullName.includes(" > ") + ? group.fullName.split(" > ").slice(1).join(" > ") + : group.fullName; + + lines.push(`**${groupName}**`); + lines.push(""); + lines.push("| Benchmark | Mean | p99 | RME | Samples |"); + lines.push("|:---|---:|---:|---:|---:|"); + + for (const b of group.benchmarks) { + lines.push( + `| ${b.name} | ${formatMs(b.mean)} | ${formatMs(b.p99)} | ${formatRme(b.rme)} | ${b.sampleCount} |`, + ); + } + + lines.push(""); + } + + lines.push(`
`); + lines.push(""); +} + +const runner = process.env.BENCH_RUNNER ?? "local"; + +lines.push( + `
Environment\n\n` + + `- Runner: \`${runner}\`\n` + + `- Runtime: Bun ${process.versions.bun}\n\n` + + `*Results are machine-dependent.*\n` + + `
`, +); + +const body = lines.join("\n"); +await Bun.write(outputPath, body); +console.log(body); diff --git a/src/api/pdf.ts b/src/api/pdf.ts index 1ac4296..b2198ee 100644 --- a/src/api/pdf.ts +++ b/src/api/pdf.ts @@ -125,6 +125,26 @@ export interface SaveOptions { * @default false */ subsetFonts?: boolean; + + /** + * Compress uncompressed streams with FlateDecode (default: true). + * + * When enabled, streams without a /Filter entry will be compressed + * before writing. Streams that already have filters (including image + * formats like DCTDecode/JPXDecode) are left unchanged. + */ + compressStreams?: boolean; + + /** + * Minimum stream size in bytes to attempt compression (default: 512). + * + * Streams smaller than this threshold are written uncompressed. + * Deflate initialization has a fixed overhead that dominates for small + * payloads, and tiny streams rarely achieve meaningful compression. + * + * Set to 0 to compress all streams regardless of size. + */ + compressionThreshold?: number; } /** @@ -1698,6 +1718,7 @@ export class PDF { * const [duplicate] = await pdf.copyPagesFrom(pdf, [0], { insertAt: 1 }); * ``` */ + // oxlint-disable-next-line typescript/require-await -- Public async API kept for backward compat; ObjectCopier is sync. async copyPagesFrom( source: PDF, indices: number[], @@ -1729,7 +1750,7 @@ export class PDF { throw new Error(`Source page ${index} not found`); } - const copiedPageRef = await copier.copyPage(srcPage.ref); + const copiedPageRef = copier.copyPage(srcPage.ref); copiedRefs.push(copiedPageRef); } @@ -1824,6 +1845,7 @@ export class PDF { * } * ``` */ + // oxlint-disable-next-line typescript/require-await -- Public async API kept for backward compat; ObjectCopier is sync. async embedPage(source: PDF, pageIndex: number): Promise { const srcPage = source.getPage(pageIndex); @@ -1842,7 +1864,7 @@ export class PDF { let resources: PdfDict; if (srcResources) { - const copied = await copier.copyObject(srcResources); + const copied = copier.copyObject(srcResources); // This is guaranteed by our checks above resources = copied instanceof PdfDict ? copied : new PdfDict(); @@ -3139,6 +3161,8 @@ export class PDF { id: fileId, useXRefStream, securityHandler, + compressStreams: options.compressStreams, + compressionThreshold: options.compressionThreshold, }); // Reset pending security state after successful save @@ -3156,6 +3180,8 @@ export class PDF { id: fileId, useXRefStream, securityHandler, + compressStreams: options.compressStreams, + compressionThreshold: options.compressionThreshold, }); // Reset pending security state after successful save diff --git a/src/document/object-copier.test.ts b/src/document/object-copier.test.ts index fae721f..b62deab 100644 --- a/src/document/object-copier.test.ts +++ b/src/document/object-copier.test.ts @@ -23,7 +23,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); // The copied page should be registered in dest expect(copiedPageRef).toBeInstanceOf(PdfRef); @@ -47,7 +47,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; @@ -73,7 +73,7 @@ describe("ObjectCopier", () => { expect(srcPage.has("Parent")).toBe(true); const copier = new ObjectCopier(source, dest); - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.has("Parent")).toBe(false); @@ -92,7 +92,7 @@ describe("ObjectCopier", () => { expect(srcPage.has("Annots")).toBe(true); const copier = new ObjectCopier(source, dest, { includeAnnotations: false }); - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.has("Annots")).toBe(false); @@ -111,7 +111,7 @@ describe("ObjectCopier", () => { expect(srcPage.has("Annots")).toBe(true); const copier = new ObjectCopier(source, dest); - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.has("Annots")).toBe(true); @@ -138,7 +138,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const fakeRef = PdfRef.of(99999, 0); - await expect(copier.copyPage(fakeRef)).rejects.toThrow(/not found/); + expect(() => copier.copyPage(fakeRef)).toThrow(/not found/); }); it("copies page Resources", async () => { @@ -150,7 +150,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; @@ -181,9 +181,9 @@ describe("ObjectCopier", () => { const str = PdfString.fromString("hello"); // Primitives should be returned as-is (they're immutable) - expect(await copier.copyObject(name)).toBe(name); - expect(await copier.copyObject(num)).toBe(num); - expect(await copier.copyObject(str)).toBe(str); + expect(copier.copyObject(name)).toBe(name); + expect(copier.copyObject(num)).toBe(num); + expect(copier.copyObject(str)).toBe(str); }); it("creates new instance for arrays", async () => { @@ -195,7 +195,7 @@ describe("ObjectCopier", () => { const arr = new PdfArray([PdfNumber.of(1), PdfNumber.of(2), PdfName.of("Test")]); - const copied = await copier.copyObject(arr); + const copied = copier.copyObject(arr); expect(copied).toBeInstanceOf(PdfArray); expect(copied).not.toBe(arr); expect(copied.length).toBe(3); @@ -218,7 +218,7 @@ describe("ObjectCopier", () => { Key2: PdfString.fromString("value"), }); - const copied = await copier.copyObject(dict); + const copied = copier.copyObject(dict); expect(copied).toBeInstanceOf(PdfDict); expect(copied).not.toBe(dict); @@ -241,7 +241,7 @@ describe("ObjectCopier", () => { Array: innerArr, }); - const copied = await copier.copyObject(outerDict); + const copied = copier.copyObject(outerDict); // Outer should be new expect(copied).not.toBe(outerDict); @@ -269,7 +269,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); // Refs should be different expect(copiedPageRef.objectNumber).not.toBe(srcPageRef.objectNumber); @@ -290,8 +290,8 @@ describe("ObjectCopier", () => { // Copy same page twice - internal resources should be deduplicated const srcPageRef = source.getPage(0)!.ref; - const copied1 = await copier.copyPage(srcPageRef); - const copied2 = await copier.copyPage(srcPageRef); + const copied1 = copier.copyPage(srcPageRef); + const copied2 = copier.copyPage(srcPageRef); // Page refs are different (each copyPage registers a new page) expect(copied1.objectNumber).not.toBe(copied2.objectNumber); @@ -321,7 +321,7 @@ describe("ObjectCopier", () => { const srcPageRef = source.getPage(0)!.ref; // This should not throw due to circular references - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); expect(copiedPageRef).toBeInstanceOf(PdfRef); // Page should be valid @@ -342,7 +342,7 @@ describe("ObjectCopier", () => { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; @@ -365,7 +365,7 @@ describe("ObjectCopier", () => { const srcPageRef = source.getPage(0)!.ref; // Should not throw - streams are decoded and re-encoded - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); const copiedPage = dest.getObject(copiedPageRef) as PdfDict; expect(copiedPage.getName("Type")?.value).toBe("Page"); @@ -381,7 +381,7 @@ describe("ObjectCopier", () => { const originalPageRef = pdf.getPage(0)!.ref; const copier = new ObjectCopier(pdf, pdf); - const duplicatedRef = await copier.copyPage(originalPageRef); + const duplicatedRef = copier.copyPage(originalPageRef); // Refs should be different expect(duplicatedRef.objectNumber).not.toBe(originalPageRef.objectNumber); @@ -411,7 +411,7 @@ describe("ObjectCopier", () => { // Copy a page const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(destOriginalCount, copiedPageRef); // Save @@ -444,7 +444,7 @@ describe("ObjectCopier", () => { for (let i = 0; i < source.getPageCount(); i++) { const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(i)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(dest.getPageCount(), copiedPageRef); } @@ -468,7 +468,7 @@ describe("ObjectCopier", () => { for (let i = 0; i < originalCount; i++) { const copier = new ObjectCopier(pdf, pdf); const srcPageRef = pdf.getPage(i)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); // Insert duplicate after the original pdf.insertPage(i * 2 + 1, copiedPageRef); } @@ -493,7 +493,7 @@ describe("ObjectCopier", () => { // Copy form page with annotations const copier = new ObjectCopier(source, dest, { includeAnnotations: true }); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(dest.getPageCount(), copiedPageRef); const savedBytes = await dest.save(); @@ -516,7 +516,7 @@ describe("ObjectCopier", () => { // Copy from encrypted source const copier = new ObjectCopier(source, dest); const srcPageRef = source.getPage(0)!.ref; - const copiedPageRef = await copier.copyPage(srcPageRef); + const copiedPageRef = copier.copyPage(srcPageRef); dest.insertPage(dest.getPageCount(), copiedPageRef); const savedBytes = await dest.save(); diff --git a/src/document/object-copier.ts b/src/document/object-copier.ts index 78c8c15..7e5d719 100644 --- a/src/document/object-copier.ts +++ b/src/document/object-copier.ts @@ -10,6 +10,8 @@ * - Flattens inherited page attributes during page copy * - Smart stream handling: raw bytes if unencrypted, re-encode if encrypted * - Circular reference detection + * + * All operations are synchronous — there is no I/O involved in object copying. */ import type { PDF } from "#src/api/pdf"; @@ -46,7 +48,7 @@ const INHERITABLE_PAGE_ATTRS = ["Resources", "MediaBox", "CropBox", "Rotate"] as * @example * ```typescript * const copier = new ObjectCopier(sourcePdf, destPdf); - * const copiedPageRef = await copier.copyPage(sourcePageRef); + * const copiedPageRef = copier.copyPage(sourcePageRef); * destPdf.insertPage(0, copiedPageRef); * ``` */ @@ -79,7 +81,7 @@ export class ObjectCopier { * @param srcPageRef Reference to the page in source document * @returns Reference to the copied page in destination document */ - async copyPage(srcPageRef: PdfRef): Promise { + copyPage(srcPageRef: PdfRef): PdfRef { const srcPage = this.source.getObject(srcPageRef); if (!(srcPage instanceof PdfDict)) { @@ -98,7 +100,7 @@ export class ObjectCopier { if (inherited) { // Deep copy the inherited value - const copied = await this.copyObject(inherited); + const copied = this.copyObject(inherited); cloned.set(key, copied); } } @@ -125,7 +127,7 @@ export class ObjectCopier { cloned.delete("Parent"); // Deep copy all values in the cloned dict, remapping refs - const copiedPage = await this.copyDictValues(cloned); + const copiedPage = this.copyDictValues(cloned); // Register in destination and return ref return this.dest.register(copiedPage); @@ -134,25 +136,25 @@ export class ObjectCopier { /** * Deep copy any PDF object, remapping references to destination. */ - async copyObject(obj: T): Promise { + copyObject(obj: T): T { if (obj instanceof PdfRef) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyRef(obj)) as unknown as T; + return this.copyRef(obj) as unknown as T; } if (obj instanceof PdfStream) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyStream(obj)) as unknown as T; + return this.copyStream(obj) as unknown as T; } if (obj instanceof PdfDict) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyDict(obj)) as unknown as T; + return this.copyDict(obj) as unknown as T; } if (obj instanceof PdfArray) { // oxlint-disable-next-line typescript/no-unsafe-type-assertion - return (await this.copyArray(obj)) as unknown as T; + return this.copyArray(obj) as unknown as T; } // Primitives (PdfName, PdfNumber, PdfString, PdfBool, PdfNull) @@ -166,7 +168,7 @@ export class ObjectCopier { * Handles circular references by registering a placeholder before * recursively copying the referenced object's contents. */ - private async copyRef(ref: PdfRef): Promise { + private copyRef(ref: PdfRef): PdfRef { const key = `${ref.objectNumber}:${ref.generation}`; // Already copied (or being copied)? @@ -206,7 +208,7 @@ export class ObjectCopier { const items: PdfObject[] = []; for (const item of srcObj) { - items.push(await this.copyObject(item)); + items.push(this.copyObject(item)); } const copiedArr = new PdfArray(items); @@ -226,7 +228,7 @@ export class ObjectCopier { /** * Copy a dict reference, handling circular references. */ - private async copyDictRef(key: string, srcDict: PdfDict): Promise { + private copyDictRef(key: string, srcDict: PdfDict): PdfRef { // Clone the dict shell first const cloned = srcDict.clone(); @@ -235,7 +237,7 @@ export class ObjectCopier { this.refMap.set(key, destRef); // Now copy all values (which may reference back to us) - await this.copyDictValues(cloned); + this.copyDictValues(cloned); return destRef; } @@ -243,7 +245,7 @@ export class ObjectCopier { /** * Copy a stream reference, handling circular references and encryption. */ - private async copyStreamRef(key: string, srcStream: PdfStream): Promise { + private copyStreamRef(key: string, srcStream: PdfStream): PdfRef { const sourceWasEncrypted = this.source.isEncrypted; // Clone the stream's dictionary @@ -310,7 +312,7 @@ export class ObjectCopier { // Now copy dict values (which may reference back to us) // Note: we modify the already-registered stream's dict entries for (const [entryKey, value] of clonedDict) { - const copied = await this.copyObject(value); + const copied = this.copyObject(value); copiedStream.set(entryKey.value, copied); } @@ -321,7 +323,7 @@ export class ObjectCopier { /** * Copy a dictionary, remapping all reference values. */ - private async copyDict(dict: PdfDict): Promise { + private copyDict(dict: PdfDict): PdfDict { const cloned = dict.clone(); return this.copyDictValues(cloned); @@ -331,9 +333,9 @@ export class ObjectCopier { * Copy all values in a dictionary, remapping references. * Modifies the dict in place and returns it. */ - private async copyDictValues(dict: PdfDict): Promise { + private copyDictValues(dict: PdfDict): PdfDict { for (const [key, value] of dict) { - const copied = await this.copyObject(value); + const copied = this.copyObject(value); dict.set(key.value, copied); } @@ -344,11 +346,11 @@ export class ObjectCopier { /** * Copy an array, remapping all reference elements. */ - private async copyArray(arr: PdfArray): Promise { + private copyArray(arr: PdfArray): PdfArray { const items: PdfObject[] = []; for (const item of arr) { - items.push(await this.copyObject(item)); + items.push(this.copyObject(item)); } return new PdfArray(items); @@ -360,14 +362,14 @@ export class ObjectCopier { * If source wasn't encrypted, copies raw encoded bytes (fastest). * If source was encrypted, decodes and re-encodes with same filters. */ - private async copyStream(stream: PdfStream): Promise { + private copyStream(stream: PdfStream): PdfStream { const sourceWasEncrypted = this.source.isEncrypted; // Clone the stream's dictionary const clonedDict = stream.clone(); // Copy dict values (remapping refs, but not stream data yet) - await this.copyDictValues(clonedDict); + this.copyDictValues(clonedDict); if (!sourceWasEncrypted) { // Source wasn't encrypted - copy raw encoded bytes directly diff --git a/src/helpers/buffer.ts b/src/helpers/buffer.ts index 2aab629..d5bdded 100644 --- a/src/helpers/buffer.ts +++ b/src/helpers/buffer.ts @@ -43,6 +43,13 @@ export function concatBytes(arrays: Uint8Array[]): Uint8Array { return result; } +/** Pre-computed hex lookup: byte value → "XX" uppercase string. */ +export const HEX_TABLE: string[] = new Array(256); + +for (let i = 0; i < 256; i++) { + HEX_TABLE[i] = i.toString(16).toUpperCase().padStart(2, "0"); +} + /** * Convert bytes to uppercase hex string. * @@ -58,7 +65,7 @@ export function bytesToHex(bytes: Uint8Array): string { let hex = ""; for (const byte of bytes) { - hex += byte.toString(16).toUpperCase().padStart(2, "0"); + hex += HEX_TABLE[byte]; } return hex; diff --git a/src/helpers/lru-cache.test.ts b/src/helpers/lru-cache.test.ts index acb3fe8..7385bc4 100644 --- a/src/helpers/lru-cache.test.ts +++ b/src/helpers/lru-cache.test.ts @@ -4,7 +4,7 @@ import { LRUCache } from "./lru-cache"; describe("LRUCache", () => { it("stores and retrieves values", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("b", 2); @@ -15,7 +15,7 @@ describe("LRUCache", () => { }); it("updates existing values", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("a", 2); @@ -25,7 +25,7 @@ describe("LRUCache", () => { }); it("evicts least recently used when at capacity", () => { - const cache = new LRUCache(3); + const cache = new LRUCache({ max: 3 }); cache.set("a", 1); cache.set("b", 2); @@ -45,7 +45,7 @@ describe("LRUCache", () => { }); it("get updates recency", () => { - const cache = new LRUCache(3); + const cache = new LRUCache({ max: 3 }); cache.set("a", 1); cache.set("b", 2); @@ -64,7 +64,7 @@ describe("LRUCache", () => { }); it("has checks existence without updating recency", () => { - const cache = new LRUCache(3); + const cache = new LRUCache({ max: 3 }); cache.set("a", 1); cache.set("b", 2); @@ -81,7 +81,7 @@ describe("LRUCache", () => { }); it("delete removes entries", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("b", 2); @@ -93,7 +93,7 @@ describe("LRUCache", () => { }); it("clear removes all entries", () => { - const cache = new LRUCache(10); + const cache = new LRUCache({ max: 10 }); cache.set("a", 1); cache.set("b", 2); @@ -105,8 +105,8 @@ describe("LRUCache", () => { expect(cache.get("a")).toBeUndefined(); }); - it("works with default size", () => { - const cache = new LRUCache(); + it("works with large capacity", () => { + const cache = new LRUCache({ max: 10000 }); // Should be able to add many items for (let i = 0; i < 1000; i++) { diff --git a/src/helpers/lru-cache.ts b/src/helpers/lru-cache.ts index cb64792..77ef617 100644 --- a/src/helpers/lru-cache.ts +++ b/src/helpers/lru-cache.ts @@ -1,91 +1,8 @@ /** - * Simple LRU (Least Recently Used) cache implementation. + * LRU cache re-export. * - * Used for interning frequently-used PDF objects (PdfName, PdfRef) - * while preventing unbounded memory growth. + * Wraps the `lru-cache` npm package to provide the same interface used + * by PdfName and PdfRef interning caches. */ -/** - * A bounded cache that evicts least-recently-used entries when full. - * - * @typeParam K - Key type - * @typeParam V - Value type - */ -export class LRUCache { - private readonly maxSize: number; - private readonly cache = new Map(); - - /** - * Create a new LRU cache. - * - * @param maxSize - Maximum number of entries to retain (default: 10000) - */ - constructor(maxSize = 10000) { - this.maxSize = maxSize; - } - - /** - * Get a value from the cache, updating its recency. - * - * @returns The cached value, or undefined if not present - */ - get(key: K): V | undefined { - const value = this.cache.get(key); - - if (value !== undefined) { - // Move to end (most recently used) - this.cache.delete(key); - this.cache.set(key, value); - } - - return value; - } - - /** - * Check if a key exists in the cache (without updating recency). - */ - has(key: K): boolean { - return this.cache.has(key); - } - - /** - * Add or update a value in the cache. - * - * If the cache is at capacity, the least-recently-used entry is evicted. - */ - set(key: K, value: V): void { - // If key exists, delete it first so it becomes the most recent - if (this.cache.has(key)) { - this.cache.delete(key); - } else if (this.cache.size >= this.maxSize) { - // Evict the oldest entry (first in Map iteration order) - const oldestKey = this.cache.keys().next().value; - if (oldestKey !== undefined) { - this.cache.delete(oldestKey); - } - } - - this.cache.set(key, value); - } - - /** - * Remove a value from the cache. - */ - delete(key: K): boolean { - return this.cache.delete(key); - } - - /** - * Clear all entries from the cache. - */ - clear(): void { - this.cache.clear(); - } - - /** - * Get the current number of entries in the cache. - */ - get size(): number { - return this.cache.size; - } -} +export { LRUCache } from "lru-cache"; diff --git a/src/objects/pdf-name.ts b/src/objects/pdf-name.ts index a8238e9..d52561c 100644 --- a/src/objects/pdf-name.ts +++ b/src/objects/pdf-name.ts @@ -1,3 +1,4 @@ +import { HEX_TABLE } from "#src/helpers/buffer"; import { CHAR_HASH, DELIMITERS, WHITESPACE } from "#src/helpers/chars"; import { LRUCache } from "#src/helpers/lru-cache"; import type { ByteWriter } from "#src/io/byte-writer"; @@ -9,11 +10,25 @@ import type { PdfPrimitive } from "./pdf-primitive"; // Plus anything outside printable ASCII (33-126) const NAME_NEEDS_ESCAPE = new Set([...WHITESPACE, ...DELIMITERS, CHAR_HASH]); +/** Module-level encoder — avoids constructing one per escapeName call. */ +const textEncoder = new TextEncoder(); + /** - * Default cache size for PdfName interning. - * Can be overridden via PdfName.setCacheSize(). + * Check whether a name is pure "safe" ASCII — every char is printable ASCII + * (33–126) and not in the escape set. If so, no escaping is needed and we + * can skip the TextEncoder entirely. */ -const DEFAULT_NAME_CACHE_SIZE = 10000; +function isSimpleAsciiName(name: string): boolean { + for (let i = 0; i < name.length; i++) { + const c = name.charCodeAt(i); + + if (c < 33 || c > 126 || NAME_NEEDS_ESCAPE.has(c)) { + return false; + } + } + + return true; +} /** * Escape a PDF name for serialization. @@ -24,15 +39,18 @@ const DEFAULT_NAME_CACHE_SIZE = 10000; * - The # character itself */ function escapeName(name: string): string { - const encoder = new TextEncoder(); - const bytes = encoder.encode(name); + // Fast path: pure safe ASCII — no encoding or escaping needed + if (isSimpleAsciiName(name)) { + return name; + } + + const bytes = textEncoder.encode(name); let result = ""; for (const byte of bytes) { if (byte < 33 || byte > 126 || NAME_NEEDS_ESCAPE.has(byte)) { - // Use hex escape - result += `#${byte.toString(16).toUpperCase().padStart(2, "0")}`; + result += `#${HEX_TABLE[byte]}`; } else { result += String.fromCharCode(byte); } @@ -41,6 +59,12 @@ function escapeName(name: string): string { return result; } +/** + * Default cache size for PdfName interning. + * Can be overridden via PdfName.setCacheSize(). + */ +const DEFAULT_NAME_CACHE_SIZE = 10000; + /** * PDF name object (interned). * @@ -57,7 +81,7 @@ export class PdfName implements PdfPrimitive { return "name"; } - private static cache = new LRUCache(DEFAULT_NAME_CACHE_SIZE); + private static cache = new LRUCache({ max: DEFAULT_NAME_CACHE_SIZE }); /** * Pre-cached common names that should never be evicted. @@ -80,6 +104,9 @@ export class PdfName implements PdfPrimitive { static readonly Filter = PdfName.createPermanent("Filter"); static readonly FlateDecode = PdfName.createPermanent("FlateDecode"); + /** Cached serialized form (e.g. "/Type"). Computed lazily on first toBytes(). */ + private cachedBytes: Uint8Array | null = null; + private constructor(readonly value: string) {} /** @@ -124,7 +151,17 @@ export class PdfName implements PdfPrimitive { } toBytes(writer: ByteWriter): void { - writer.writeAscii(`/${escapeName(this.value)}`); + let bytes = this.cachedBytes; + + if (bytes === null) { + const escaped = escapeName(this.value); + + bytes = textEncoder.encode(`/${escaped}`); + + this.cachedBytes = bytes; + } + + writer.writeBytes(bytes); } /** diff --git a/src/objects/pdf-ref.ts b/src/objects/pdf-ref.ts index 3c4e474..77312c7 100644 --- a/src/objects/pdf-ref.ts +++ b/src/objects/pdf-ref.ts @@ -23,7 +23,7 @@ export class PdfRef implements PdfPrimitive { return "ref"; } - private static cache = new LRUCache(DEFAULT_REF_CACHE_SIZE); + private static cache = new LRUCache({ max: DEFAULT_REF_CACHE_SIZE }); private constructor( readonly objectNumber: number, diff --git a/src/writer/pdf-writer.ts b/src/writer/pdf-writer.ts index 27469df..371a073 100644 --- a/src/writer/pdf-writer.ts +++ b/src/writer/pdf-writer.ts @@ -54,6 +54,18 @@ export interface WriteOptions { */ compressStreams?: boolean; + /** + * Minimum stream size in bytes to attempt compression (default: 512). + * + * Streams smaller than this threshold are written uncompressed. + * Deflate initialization has a fixed cost (~0.023ms for pako's 64KB + * hash table) that dominates for small payloads, and tiny streams + * rarely achieve meaningful compression. + * + * Set to 0 to compress all streams regardless of size. + */ + compressionThreshold?: number; + /** * Security handler for encrypting content. * @@ -106,7 +118,13 @@ function writeIndirectObject(writer: ByteWriter, ref: PdfRef, obj: PdfObject): v * Streams that already have filters are returned unchanged - this includes * image formats (DCTDecode, JPXDecode, etc.) that are already compressed. */ -function prepareObjectForWrite(obj: PdfObject, compress: boolean): PdfObject { +const DEFAULT_COMPRESSION_THRESHOLD = 512; + +function prepareObjectForWrite( + obj: PdfObject, + compress: boolean, + compressionThreshold: number, +): PdfObject { // Only process streams if (!(obj instanceof PdfStream)) { return obj; @@ -122,8 +140,11 @@ function prepareObjectForWrite(obj: PdfObject, compress: boolean): PdfObject { return obj; } - // Empty streams don't need compression - if (obj.data.length === 0) { + // Pako's deflate initialization zeros a 64KB hash table on every call + // (~0.023ms). For streams below the threshold the compression savings + // are negligible relative to the init cost, especially when writing + // many PDFs (e.g. splitting 2000 pages). + if (obj.data.length < compressionThreshold) { return obj; } @@ -322,6 +343,7 @@ function collectReachableRefs( export function writeComplete(registry: ObjectRegistry, options: WriteOptions): WriteResult { const writer = new ByteWriter(); const compress = options.compressStreams ?? true; + const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD; // Version const version = options.version ?? "1.7"; @@ -346,7 +368,7 @@ export function writeComplete(registry: ObjectRegistry, options: WriteOptions): continue; // Skip orphan objects } // Prepare object (compress streams if needed) - let prepared = prepareObjectForWrite(obj, compress); + let prepared = prepareObjectForWrite(obj, compress, threshold); // Apply encryption if security handler is provided // Skip encrypting the /Encrypt dictionary itself @@ -467,6 +489,7 @@ export function writeIncremental( } const compress = options.compressStreams ?? true; + const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD; // Initialize ByteWriter with original bytes const writer = new ByteWriter(options.originalBytes); @@ -483,7 +506,7 @@ export function writeIncremental( // Write modified objects for (const [ref, obj] of changes.modified) { - let prepared = prepareObjectForWrite(obj, compress); + let prepared = prepareObjectForWrite(obj, compress, threshold); // Apply encryption if security handler is provided // Skip encrypting the /Encrypt dictionary itself @@ -505,7 +528,7 @@ export function writeIncremental( // Write new objects for (const [ref, obj] of changes.created) { - let prepared = prepareObjectForWrite(obj, compress); + let prepared = prepareObjectForWrite(obj, compress, threshold); // Apply encryption if security handler is provided // Skip encrypting the /Encrypt dictionary itself