From dc35d21662a673a1b2d42c12425231c8efefb3bd Mon Sep 17 00:00:00 2001
From: Lukasz Jagiello <jagiello.lukasz@gmail.com>
Date: Wed, 11 Feb 2026 13:31:46 -0800
Subject: [PATCH] fix(text): preserve stream order for RTL-placed design-tool
 PDFs

Design tools like Figma and Canva emit LTR text with right-to-left TJ
placement. Sorting by x-position reverses the text. Detect this pattern
via sequenceIndex and preserve content stream order instead.

- Add RTL_PLACED_THRESHOLD constant (0.8) with documentation
- Return OrderedLine { chars, rtlPlaced } from orderLineChars
- Fix gap calculation in groupIntoSpans for RTL-placed lines
- Fix createSpaceChar bbox positioning for RTL-placed lines
- Use fractional sequenceIndex (n + 0.5) for synthetic spaces
- Make sequenceIndex optional on ExtractedChar
- Guard against missing sequenceIndex (fall back to x-sort)
- Document that heuristic correctly handles genuine RTL text
- Document mixed bidi limitation (needs full bidi algorithm)
- Add 12 unit tests for RTL-placed detection edge cases
---
 fixtures/text/rtl-placed-ltr-text.pdf        | Bin 0 -> 5879 bytes
 src/integration/text/rtl-placed-text.test.ts |  33 +++
 src/text/line-grouper.test.ts                | 244 +++++++++++++++++++
 src/text/line-grouper.ts                     | 143 ++++++++++-
 src/text/text-extractor.ts                   |   1 +
 src/text/text-search.test.ts                 |   1 +
 src/text/types.ts                            |   2 +
 7 files changed, 414 insertions(+), 10 deletions(-)
 create mode 100644 fixtures/text/rtl-placed-ltr-text.pdf
 create mode 100644 src/integration/text/rtl-placed-text.test.ts

diff --git a/fixtures/text/rtl-placed-ltr-text.pdf b/fixtures/text/rtl-placed-ltr-text.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..ca421574c6c9286b0b9a5d515ae3ac5b714080f4
GIT binary patch
literal 5879
zcmeHL&5qkP5WedvcnQ!Rme!CWB>{^CvPl*#nl?dVBn5&Tw6@l(z_u2aT%>)89{L9D
zqwI_Iqb!*;Ck}e(F##5lH5_v0n~(FOC$FF1T#(F?Cy#$W{`2_f<6jAhRJ6CBlFLgm
zoB#e$i`lcPuh#8K%wAWkx)TZu-iWKKq~0#wLihVr;jnJ@Q?u;E2kn-B#4hcA+Y1U5
zorEj8V|F!`9?qHlTrZpIy8SFZq*!lBiM&)F6CvD|+a4=Bk-0O5S8wXB-R~AK^YYTY
zz^WUp!Y@3Kux_C<;UN0;)$ONxagfgI&;7fm+gEF0f>l>nfyM5e$>NvwcGcetZSu^m
z==xn<ZIW5A&GR<7o+nz&cZoZj#QZKHw?!)OpH54uQ;}z>m~X^owzv}WPbYGc2F{Lh
zdl$)7X=P!K@?6F8qLgWVlv@+a^IT@+<x3{l;rS|M^7KT%jOA8I>+R2(zA-0q9qVVg
zEV840IkR6|NySlanf+QxRUYNsew9h%@wZugK3SQ2`#Jukr48_l<EK(13w=DF>!&2i
z9_1{4ROV9o_%ZvbOhb7d<3~jv?r-DxP@zxsWB(}!KRv$u{G6l>@S_MnGNmj7|Ks?O
zQIb4=89ym2qzdq1_GOvWU&BY|q1-ZjR4I-3kHv=+$s+Ktj{T!7lym&kT&4j(V*XN^
zp2%bSaJ^lw8?S#JyI&Mi``f|LD>9jRaG7>)r46u+ZOfHRz3Yscsw`v%+teX-**NqH
z)U+^elE=0yP}6`KlRT!T0`+oOKgxOPaOMa5GksfzauyopQd6KGL$5&Hui9rQw9m%W
zP??q`;GO)-BkHIOp$BzGIlEeFOkI<H93BPgy3{wxS?W<X+@D>2rEvAUK4XU>Ts@@D
zv-S~%a*l6`(gFTV_Qm#7X@=A~$zy&{iL2-N!SErlI}PH;^uZ68I;Q>O(n;XAOC9I@
zBa71D@nM&XELtY7&+sLK(%|u9rHeA~H;~8tBW+~r{b!|(6iP$jKl6`DnFance8@=S
z?dSL+zSjHC_(6zo;q7Pml&HL4F@MZWBl_%ikGbhw@W$LUX|uKm+;N!?`OCFcEH(;d
z&%*af&hD-|K9I+;Q8~B`Y$tu5d%*2LKgNE@J%OC*8<Z7bKkd)-(H0Tz)+lG(QW|9&
zWoOc7{()OAcTf6ke~q>i!5ig_8%pUR_f7io-B#e9YfC3N+aKI>x$*4&q(XhmavRzz
zmpey&R(poF3EZ6IF+T`xvCB=9oZTI;)3u$WoYg);+njp)8TSeNaBbbF&+sE_hPG-t
zKgMs!E#7{9_e?&tU1#^tfPWDG7=Ox8+kJd_+oKCvdVVnc3H-qHGWo~#QF~l&Jc}P{
zi_3jy{U6(^v;HCXgy+X?B}$9-^kejJorky1#+BJM9>L{{FVC2JCfg5v&x^U6gva@B
zoQd58ISq=3A$l4$XK(kn{V-PFw7bnPvtECBx(!F!FHf=KAUhjJQH6WqCsB-^!_=9%
zv2z+^mdST7+g-g8%|o}}h-JHOccN>0QElp8EZS{{ak%dHyYJmF3eVUsF7INOYS}zA
z-J;p9M7@T7S1+-fsGEJaX_um}HxKRkO&oyOq}eW-WwUhO1@^sISGO=tz>dMJT5PJ-
zwwerK5W7+S6yjmf_xL*8NzAURu0BY9sMo*MeY2<r)L(2D?b6vld*5uIZo6iBF-UKk
zUDrRmuXZ?P_OcqEl`(_#eY5QE@ohtaEZ_eAI|i2Mf5*$98V`RU)%5IQ;+}`tDCN(@
zbD{Nd-Lyu#chEo4`xaY|+czw}gvH@2q)RT(>u#}Y9(rWc92q10JnDY)JQXMmA1Hr8
z0v5AlhC;qySF28Fb;w3I;!q_n2y^>|QbyqWK`Kg32P1yOC)#H5bh}#DE@M6I7VZ;j
zjwx^Ej&eUPP#=cfo>dP&)XnOCl)pu?ekag#`cJx_clBM8iXy=o<3C}IL1DcU$9GhU
zK_<5T9j1o(9l_T4okj-|z0;*)x*3}Fc%8}cW#FjW@2X~9?}m@nZ<{}A0e|1Lt^3YA
VMxpDgT|f9^;6d`_$%|Jv$-j!v@VWp1

literal 0
HcmV?d00001

diff --git a/src/integration/text/rtl-placed-text.test.ts b/src/integration/text/rtl-placed-text.test.ts
new file mode 100644
index 0000000..62d138d
--- /dev/null
+++ b/src/integration/text/rtl-placed-text.test.ts
@@ -0,0 +1,33 @@
+/**
+ * Regression test for reversed text extraction from design-tool PDFs.
+ *
+ * Some design tools (e.g. Figma, Canva) export PDFs where characters are
+ * placed RIGHT-TO-LEFT in user space via TJ positioning adjustments, even
+ * though the text is LTR (English). The font has near-zero glyph widths,
+ * and all positioning is done via positive TJ adjustments (which move the
+ * pen left). Characters appear in correct reading order in the content
+ * stream, but their x-positions decrease.
+ *
+ * The line grouper sorts characters by x-position (left to right), which
+ * reverses the correct reading order for these PDFs.
+ */
+import { PDF } from "#src/api/pdf";
+import { loadFixture } from "#src/test-utils";
+import { describe, expect, it } from "vitest";
+
+describe("RTL-placed LTR text (design-tool PDFs)", () => {
+  it("extracts text in correct reading order, not reversed", async () => {
+    const bytes = await loadFixture("text", "rtl-placed-ltr-text.pdf");
+    const pdf = await PDF.load(bytes);
+    const page = pdf.getPage(0);
+
+    expect(page).not.toBeNull();
+
+    const pageText = page!.extractText();
+
+    // The fixture has lorem ipsum text placed right-to-left via TJ adjustments.
+    // Text should read correctly, not reversed.
+    expect(pageText.text).toContain("Lorem ipsum dolor sit amet consectetur");
+    expect(pageText.text).not.toContain("rutetcesnoc tema tis rolod muspi meroL");
+  });
+});
diff --git a/src/text/line-grouper.test.ts b/src/text/line-grouper.test.ts
index f1ee258..ba76456 100644
--- a/src/text/line-grouper.test.ts
+++ b/src/text/line-grouper.test.ts
@@ -19,6 +19,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 0,
         },
         {
           char: "e",
@@ -26,6 +27,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 1,
         },
         {
           char: "l",
@@ -33,6 +35,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 2,
         },
         {
           char: "l",
@@ -40,6 +43,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 3,
         },
         {
           char: "o",
@@ -47,6 +51,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 4,
         },
       ];
 
@@ -66,6 +71,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 100,
+          sequenceIndex: 0,
         },
         {
           char: "B",
@@ -73,6 +79,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 100,
+          sequenceIndex: 1,
         },
         // Line 2 at baseline 80
         {
@@ -81,6 +88,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 80,
+          sequenceIndex: 2,
         },
         {
           char: "D",
@@ -88,6 +96,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 80,
+          sequenceIndex: 3,
         },
       ];
 
@@ -109,6 +118,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 0,
         },
         {
           char: "i",
@@ -116,6 +126,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 1,
         },
         // Gap that should trigger space insertion
         {
@@ -124,6 +135,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 2,
         },
         {
           char: "h",
@@ -131,6 +143,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 3,
         },
         {
           char: "e",
@@ -138,6 +151,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 4,
         },
         {
           char: "r",
@@ -145,6 +159,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 5,
         },
         {
           char: "e",
@@ -152,6 +167,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 6,
         },
       ];
 
@@ -169,6 +185,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 0,
         },
         {
           char: "o",
@@ -176,6 +193,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 1,
         },
         {
           char: "r",
@@ -183,6 +201,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 2,
         },
         {
           char: "m",
@@ -190,6 +209,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 3,
         },
         {
           char: "a",
@@ -197,6 +217,7 @@ describe("LineGrouper", () => {
           fontSize: 14,
           fontName: "Helvetica-Bold",
           baseline: 10,
+          sequenceIndex: 4,
         },
         {
           char: "l",
@@ -204,6 +225,7 @@ describe("LineGrouper", () => {
           fontSize: 14,
           fontName: "Helvetica-Bold",
           baseline: 10,
+          sequenceIndex: 5,
         },
       ];
 
@@ -224,6 +246,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 0,
         },
         {
           char: "B",
@@ -231,6 +254,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10.5,
+          sequenceIndex: 1,
         },
         {
           char: "C",
@@ -238,6 +262,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 11,
+          sequenceIndex: 2,
         },
       ];
 
@@ -255,6 +280,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 0,
         },
         // Small gap - should NOT be a space with high threshold
         {
@@ -263,6 +289,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 10,
+          sequenceIndex: 1,
         },
       ];
 
@@ -278,6 +305,219 @@ describe("LineGrouper", () => {
     });
   });
 
+  describe("RTL-placed text detection", () => {
+    /** Helper to build an ExtractedChar with sensible defaults. */
+    function makeChar(char: string, x: number, sequenceIndex?: number, width = 8): ExtractedChar {
+      return {
+        char,
+        bbox: { x, y: 0, width, height: 12 },
+        fontSize: 12,
+        fontName: "Helvetica",
+        baseline: 10,
+        sequenceIndex,
+      };
+    }
+
+    it("preserves stream order for 100% RTL-placed chars", () => {
+      // Chars placed right-to-left (x decreasing) but stream order is A, B, C, D.
+      // Adjacent chars touch (x + width = next x) so no spaces inserted.
+      const chars = [
+        makeChar("A", 30, 0),
+        makeChar("B", 22, 1),
+        makeChar("C", 14, 2),
+        makeChar("D", 6, 3),
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("ABCD");
+    });
+
+    it("detects RTL-placed at exactly 80% threshold", () => {
+      // 6 chars → 5 pairs. 4 decreasing = 80% → should be detected.
+      // Adjacent chars (width=8) so gaps are 0 and no spaces inserted.
+      const chars = [
+        makeChar("A", 50, 0),
+        makeChar("B", 42, 1), // decreasing
+        makeChar("C", 34, 2), // decreasing
+        makeChar("D", 26, 3), // decreasing
+        makeChar("E", 28, 4), // increasing (forward jump)
+        makeChar("F", 20, 5), // decreasing
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("ABCDEF");
+    });
+
+    it("falls back to x-sort below 80% threshold", () => {
+      // 6 chars → 5 pairs. 3 decreasing = 60% → NOT detected → x-sort.
+      const chars = [
+        makeChar("A", 50, 0),
+        makeChar("B", 42, 1), // decreasing
+        makeChar("C", 44, 2), // increasing
+        makeChar("D", 36, 3), // decreasing
+        makeChar("E", 38, 4), // increasing
+        makeChar("F", 30, 5), // decreasing
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      // x-sorted order: F(30), D(36), E(38), B(42), C(44), A(50)
+      expect(lines[0].text).toBe("FDEBCA");
+    });
+
+    it("uses x-sort for normal LTR text", () => {
+      const chars = [
+        makeChar("A", 0, 0),
+        makeChar("B", 10, 1),
+        makeChar("C", 20, 2),
+        makeChar("D", 30, 3),
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("ABCD");
+    });
+
+    it("handles single character", () => {
+      const chars = [makeChar("X", 10, 0)];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("X");
+    });
+
+    it("detects two chars with decreasing x as RTL-placed", () => {
+      // 2 chars → 1 pair, 1/1 = 100% decreasing
+      const chars = [makeChar("A", 20, 0), makeChar("B", 10, 1)];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("AB");
+    });
+
+    it("preserves stream order for genuine RTL text with normal glyph widths", () => {
+      // Real RTL text (Arabic/Hebrew) has normal glyph widths and decreasing x.
+      // The heuristic correctly detects this and preserves stream order, which
+      // IS the correct reading order for RTL text.
+      const chars = [
+        makeChar("\u0628", 30, 0), // ba
+        makeChar("\u0627", 22, 1), // alef
+        makeChar("\u062F", 14, 2), // dal
+        makeChar("\u0631", 6, 3), // ra
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      // Stream order preserved: ba, alef, dal, ra (correct reading order)
+      expect(lines[0].text).toBe("\u0628\u0627\u062F\u0631");
+    });
+
+    it("inserts space correctly in RTL-placed lines", () => {
+      // Two words placed right-to-left with a gap between them.
+      // Within-word: chars adjacent (prev.x - (char.x + char.width) ≈ 0).
+      // Between-word: gap = 42 - (24 + 8) = 10 > 3.6 threshold → space.
+      const chars = [
+        makeChar("H", 50, 0),
+        makeChar("i", 42, 1),
+        makeChar("t", 24, 2),
+        makeChar("h", 16, 3),
+        makeChar("e", 8, 4),
+        makeChar("r", 0, 5),
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("Hi ther");
+    });
+
+    it("inserts multiple spaces in RTL-placed lines with three words", () => {
+      // Three words "AB CD EF" placed right-to-left.
+      // Within-word gap = prev.x - (char.x + 8) = 0 → no space.
+      // Between-word gap = 10 > 3.6 → space.
+      const chars = [
+        makeChar("A", 52, 0),
+        makeChar("B", 44, 1), // gap = 52 - 52 = 0 → no space
+        makeChar("C", 28, 2), // gap = 44 - 36 = 8 → space
+        makeChar("D", 20, 3), // gap = 28 - 28 = 0 → no space
+        makeChar("E", 4, 4), // gap = 20 - 12 = 8 → space
+        makeChar("F", -4, 5), // gap = 4 - 4 = 0 → no space
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("AB CD EF");
+    });
+
+    it("handles overlapping RTL-placed characters without crashing", () => {
+      // Tightly kerned chars where bboxes overlap slightly.
+      // gap = prevChar.x - (char.x + char.width) → negative → no space
+      const chars = [
+        makeChar("A", 20, 0),
+        makeChar("B", 13, 1), // gap = 20 - 21 = -1 → no space (overlap)
+        makeChar("C", 6, 2), // gap = 13 - 14 = -1 → no space (overlap)
+        makeChar("D", -1, 3), // gap = 6 - 7 = -1 → no space (overlap)
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      expect(lines[0].text).toBe("ABCD");
+    });
+
+    it("handles mixed RTL-placed and LTR lines on the same page", () => {
+      // Line 1 (baseline 100): RTL-placed text (decreasing x in stream order)
+      // Line 2 (baseline 80): normal LTR text (increasing x)
+      // Each line's RTL detection is independent.
+      const chars: ExtractedChar[] = [
+        // RTL-placed line — adjacent chars (no spaces)
+        { ...makeChar("R", 24, 0), baseline: 100, bbox: { x: 24, y: 90, width: 8, height: 12 } },
+        { ...makeChar("T", 16, 1), baseline: 100, bbox: { x: 16, y: 90, width: 8, height: 12 } },
+        { ...makeChar("L", 8, 2), baseline: 100, bbox: { x: 8, y: 90, width: 8, height: 12 } },
+        // Normal LTR line — adjacent chars (no spaces)
+        { ...makeChar("L", 0, 3), baseline: 80, bbox: { x: 0, y: 70, width: 8, height: 12 } },
+        { ...makeChar("T", 8, 4), baseline: 80, bbox: { x: 8, y: 70, width: 8, height: 12 } },
+        { ...makeChar("R", 16, 5), baseline: 80, bbox: { x: 16, y: 70, width: 8, height: 12 } },
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(2);
+      // Line 1 (higher baseline): RTL-placed → stream order preserved
+      expect(lines[0].text).toBe("RTL");
+      expect(lines[0].baseline).toBe(100);
+      // Line 2 (lower baseline): normal LTR → x-sort
+      expect(lines[1].text).toBe("LTR");
+      expect(lines[1].baseline).toBe(80);
+    });
+
+    it("falls back to x-sort when sequenceIndex is missing", () => {
+      // Chars placed right-to-left but without sequenceIndex — should x-sort
+      const chars = [
+        makeChar("A", 30, undefined),
+        makeChar("B", 20, undefined),
+        makeChar("C", 10, undefined),
+        makeChar("D", 0, undefined),
+      ];
+
+      const lines = groupCharsIntoLines(chars);
+
+      expect(lines).toHaveLength(1);
+      // x-sort produces D(0), C(10), B(20), A(30)
+      expect(lines[0].text).toBe("DCBA");
+    });
+  });
+
   describe("getPlainText", () => {
     it("joins lines with newlines", () => {
       const chars: ExtractedChar[] = [
@@ -287,6 +527,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 100,
+          sequenceIndex: 0,
         },
         {
           char: "1",
@@ -294,6 +535,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 100,
+          sequenceIndex: 1,
         },
         {
           char: "L",
@@ -301,6 +543,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 80,
+          sequenceIndex: 2,
         },
         {
           char: "2",
@@ -308,6 +551,7 @@ describe("LineGrouper", () => {
           fontSize: 12,
           fontName: "Helvetica",
           baseline: 80,
+          sequenceIndex: 3,
         },
       ];
 
diff --git a/src/text/line-grouper.ts b/src/text/line-grouper.ts
index a4b2aca..a8c27e7 100644
--- a/src/text/line-grouper.ts
+++ b/src/text/line-grouper.ts
@@ -9,6 +9,25 @@
 import type { ExtractedChar, TextLine, TextSpan } from "./types";
 import { mergeBboxes } from "./types";
 
+/**
+ * Minimum fraction of consecutive char pairs with decreasing x-positions
+ * (in stream order) to classify a line as "RTL-placed".
+ *
+ * Figma/Canva exports produce ~100% decreasing pairs within words.
+ * 80% tolerates small forward jumps at word boundaries.
+ */
+const RTL_PLACED_THRESHOLD = 0.8;
+
+/**
+ * Result of ordering characters within a line.
+ */
+interface OrderedLine {
+  /** Characters in reading order */
+  chars: ExtractedChar[];
+  /** Whether the line was detected as RTL-placed (design-tool pattern) */
+  rtlPlaced: boolean;
+}
+
 /**
  * Options for line grouping.
  */
@@ -53,11 +72,15 @@ export function groupCharsIntoLines(
   const lines: TextLine[] = [];
 
   for (const group of lineGroups) {
-    // Sort characters left-to-right within the line
-    const sorted = [...group].sort((a, b) => a.bbox.x - b.bbox.x);
+    // Order characters within the line.
+    // Normally we sort left-to-right by x-position, but some design tools
+    // (Figma, Canva) place characters right-to-left via TJ adjustments while
+    // the text is actually LTR. In that case, content stream order is correct
+    // and position-based sorting would reverse the text.
+    const { chars: sorted, rtlPlaced } = orderLineChars(group);
 
     // Group into spans and detect spaces
-    const spans = groupIntoSpans(sorted, spaceThreshold);
+    const spans = groupIntoSpans(sorted, spaceThreshold, rtlPlaced);
 
     if (spans.length === 0) {
       continue;
@@ -82,6 +105,93 @@ export function groupCharsIntoLines(
   return lines;
 }
 
+/**
+ * Determine the correct character order for a line.
+ *
+ * Design tools like Figma and Canva export PDFs where LTR characters are placed
+ * right-to-left via TJ positioning adjustments (positive values move the pen left).
+ * The font has near-zero glyph widths, so all positioning comes from TJ. Characters
+ * appear in correct reading order in the content stream, but their x-positions
+ * decrease monotonically.
+ *
+ * When this pattern is detected, we preserve content stream order instead of sorting
+ * by x-position, which would reverse the text.
+ *
+ * **Limitation**: Detection requires `sequenceIndex` on every character. If any
+ * character in the group lacks a `sequenceIndex`, we fall back to x-position sorting
+ * because stream order cannot be reliably reconstructed.
+ */
+function orderLineChars(group: ExtractedChar[]): OrderedLine {
+  if (group.length <= 1) {
+    return { chars: [...group], rtlPlaced: false };
+  }
+
+  // If any character lacks sequenceIndex, fall back to x-sort
+  const hasStreamOrder = group.every(c => c.sequenceIndex != null);
+
+  if (!hasStreamOrder) {
+    return {
+      chars: [...group].sort((a, b) => a.bbox.x - b.bbox.x),
+      rtlPlaced: false,
+    };
+  }
+
+  // Sort by sequenceIndex to get content stream order.
+  // Safe to use `!` — hasStreamOrder guarantees every char has sequenceIndex.
+  const streamOrder = [...group].sort((a, b) => a.sequenceIndex! - b.sequenceIndex!);
+
+  if (isRtlPlaced(streamOrder)) {
+    return { chars: streamOrder, rtlPlaced: true };
+  }
+
+  // Normal case: sort left-to-right by x-position
+  return {
+    chars: [...group].sort((a, b) => a.bbox.x - b.bbox.x),
+    rtlPlaced: false,
+  };
+}
+
+/**
+ * Detect whether characters are placed right-to-left in user space while
+ * content stream order represents the correct reading order.
+ *
+ * Returns true when x-positions in stream order are predominantly decreasing
+ * (≥ 80% of consecutive pairs). In that case, position-based sorting would
+ * reverse the reading order, so we preserve stream order instead.
+ *
+ * This covers two real-world scenarios:
+ * - **Design-tool PDFs** (Figma, Canva): LTR text placed right-to-left via
+ *   TJ positioning adjustments. Stream order = correct reading order.
+ * - **Genuine RTL text** (Arabic, Hebrew): characters naturally placed
+ *   right-to-left. PDF producers typically emit them in reading order, so
+ *   stream order is again correct.
+ *
+ * In both cases, when x-positions decrease in stream order, preserving stream
+ * order produces the correct reading order.
+ *
+ * **Known limitation**: mixed bidi text (e.g., Arabic with embedded English)
+ * requires a full Unicode bidi algorithm, which is out of scope for this
+ * heuristic. For mixed lines, neither stream order nor x-sort is fully
+ * correct; a future bidi implementation should replace this heuristic.
+ */
+function isRtlPlaced(streamOrder: ExtractedChar[]): boolean {
+  if (streamOrder.length < 2) {
+    return false;
+  }
+
+  // Count how many consecutive character pairs have decreasing x
+  let decreasingCount = 0;
+  for (let i = 1; i < streamOrder.length; i++) {
+    if (streamOrder[i].bbox.x < streamOrder[i - 1].bbox.x) {
+      decreasingCount++;
+    }
+  }
+
+  const totalPairs = streamOrder.length - 1;
+
+  return decreasingCount / totalPairs >= RTL_PLACED_THRESHOLD;
+}
+
 /**
  * Group characters by baseline Y coordinate.
  */
@@ -113,7 +223,11 @@ function groupByBaseline(chars: ExtractedChar[], tolerance: number): ExtractedCh
 /**
  * Group characters into spans based on font/size and detect spaces.
  */
-function groupIntoSpans(chars: ExtractedChar[], spaceThreshold: number): TextSpan[] {
+function groupIntoSpans(
+  chars: ExtractedChar[],
+  spaceThreshold: number,
+  rtlPlaced: boolean,
+): TextSpan[] {
   if (chars.length === 0) {
     return [];
   }
@@ -131,8 +245,12 @@ function groupIntoSpans(chars: ExtractedChar[], spaceThreshold: number): TextSpa
     const fontChanged =
       char.fontName !== currentFontName || Math.abs(char.fontSize - currentFontSize) > 0.5;
 
-    // Check for space gap
-    const gap = char.bbox.x - (prevChar.bbox.x + prevChar.bbox.width);
+    // Check for space gap — in RTL-placed lines, the "next" character in
+    // reading order sits to the left of the previous one, so the gap is
+    // measured from the left edge of prevChar to the right edge of char.
+    const gap = rtlPlaced
+      ? prevChar.bbox.x - (char.bbox.x + char.bbox.width)
+      : char.bbox.x - (prevChar.bbox.x + prevChar.bbox.width);
     const avgFontSize = (prevChar.fontSize + char.fontSize) / 2;
     const needsSpace = gap > avgFontSize * spaceThreshold;
 
@@ -147,7 +265,7 @@ function groupIntoSpans(chars: ExtractedChar[], spaceThreshold: number): TextSpa
     } else if (needsSpace) {
       // Add space to current span and continue
       // We insert a synthetic space character
-      currentSpan.push(createSpaceChar(prevChar, char));
+      currentSpan.push(createSpaceChar(prevChar, char, rtlPlaced));
       currentSpan.push(char);
     } else {
       currentSpan.push(char);
@@ -184,9 +302,13 @@ function buildSpan(chars: ExtractedChar[]): TextSpan {
 /**
  * Create a synthetic space character between two characters.
  */
-function createSpaceChar(before: ExtractedChar, after: ExtractedChar): ExtractedChar {
-  const x = before.bbox.x + before.bbox.width;
-  const width = after.bbox.x - x;
+function createSpaceChar(
+  before: ExtractedChar,
+  after: ExtractedChar,
+  rtlPlaced: boolean,
+): ExtractedChar {
+  const x = rtlPlaced ? after.bbox.x + after.bbox.width : before.bbox.x + before.bbox.width;
+  const width = rtlPlaced ? before.bbox.x - x : after.bbox.x - x;
 
   return {
     char: " ",
@@ -199,6 +321,7 @@ function createSpaceChar(before: ExtractedChar, after: ExtractedChar): Extracted
     fontSize: (before.fontSize + after.fontSize) / 2,
     fontName: before.fontName,
     baseline: (before.baseline + after.baseline) / 2,
+    sequenceIndex: before.sequenceIndex != null ? before.sequenceIndex + 0.5 : undefined,
   };
 }
 
diff --git a/src/text/text-extractor.ts b/src/text/text-extractor.ts
index 1b134f9..4a9c41f 100644
--- a/src/text/text-extractor.ts
+++ b/src/text/text-extractor.ts
@@ -278,6 +278,7 @@ export class TextExtractor {
         fontSize: this.state.effectiveFontSize,
         fontName: font.baseFontName,
         baseline: bbox.baseline,
+        sequenceIndex: this.chars.length,
       });
 
       // Advance text position
diff --git a/src/text/text-search.test.ts b/src/text/text-search.test.ts
index 9d4b5b6..122dfab 100644
--- a/src/text/text-search.test.ts
+++ b/src/text/text-search.test.ts
@@ -20,6 +20,7 @@ function createPageText(text: string, pageIndex = 0): PageText {
       fontSize: 12,
       fontName: "Helvetica",
       baseline: 10,
+      sequenceIndex: chars.length,
     });
     x += 10;
   }
diff --git a/src/text/types.ts b/src/text/types.ts
index 2b7052d..a505f38 100644
--- a/src/text/types.ts
+++ b/src/text/types.ts
@@ -33,6 +33,8 @@ export interface ExtractedChar {
   fontName: string;
   /** Y coordinate of the text baseline */
   baseline: number;
+  /** Index in the content stream extraction order (0-based) */
+  sequenceIndex?: number;
 }
 
 /**