Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added fixtures/text/rtl-placed-ltr-text.pdf
Binary file not shown.
33 changes: 33 additions & 0 deletions src/integration/text/rtl-placed-text.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* Regression test for reversed text extraction from design-tool PDFs.
*
* Some design tools (e.g. Figma, Canva) export PDFs where characters are
* placed RIGHT-TO-LEFT in user space via TJ positioning adjustments, even
* though the text is LTR (English). The font has near-zero glyph widths,
* and all positioning is done via positive TJ adjustments (which move the
* pen left). Characters appear in correct reading order in the content
* stream, but their x-positions decrease.
*
* The line grouper sorts characters by x-position (left to right), which
* reverses the correct reading order for these PDFs.
*/
import { PDF } from "#src/api/pdf";
import { loadFixture } from "#src/test-utils";
import { describe, expect, it } from "vitest";

describe("RTL-placed LTR text (design-tool PDFs)", () => {
it("extracts text in correct reading order, not reversed", async () => {
const bytes = await loadFixture("text", "rtl-placed-ltr-text.pdf");
const pdf = await PDF.load(bytes);
const page = pdf.getPage(0);

expect(page).not.toBeNull();

const pageText = page!.extractText();

// The fixture has lorem ipsum text placed right-to-left via TJ adjustments.
// Text should read correctly, not reversed.
expect(pageText.text).toContain("Lorem ipsum dolor sit amet consectetur");
expect(pageText.text).not.toContain("rutetcesnoc tema tis rolod muspi meroL");
});
});
Loading