From 3ff246410766f450562a455f61fcb414042a92b0 Mon Sep 17 00:00:00 2001
From: avdoseferovic <eo.avdo@gmail.com>
Date: Fri, 16 Jan 2026 16:24:12 +0100
Subject: [PATCH 1/2] Refactor: Support emojis and high unicode characters

Changes:

- Change fontDefType.Cw and utf8FontFile.CharWidths from slice to map[int]int to support sparse and high unicode characters (fixing crash).

- Update utf8toutf16 to correctly handle 4-byte UTF-8 sequences using surrogate pairs.

- Add UnmarshalJSON to fontDefType to backward-compatibility with array-based font definitions.

- Remove hardcoded limit checks for character widths.
---
 def.go          | 40 ++++++++++++++++++++++++++++++++++++++-
 font.go         |  8 +++++++-
 fpdf.go         | 50 ++++++++++++++++++++++++++++++++-----------------
 splittext.go    |  2 +-
 utf8fontfile.go | 20 +++++++-------------
 util.go         | 33 ++++++++++----------------------
 6 files changed, 97 insertions(+), 56 deletions(-)

diff --git a/def.go b/def.go
index 6a7030f9..9a5f16c1 100644
--- a/def.go
+++ b/def.go
@@ -23,6 +23,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"strconv"
 	"time"
 )
 
@@ -702,7 +703,7 @@ type fontDefType struct {
 	Desc         FontDescType  // Font descriptor
 	Up           int           // Underline position
 	Ut           int           // Underline thickness
-	Cw           []int         // Character width by ordinal
+	Cw           map[int]int   // Character width by ordinal
 	Enc          string        // "cp1252", ...
 	Diff         string        // Differences from reference encoding
 	File         string        // "Redressed.z"
@@ -715,6 +716,43 @@ type fontDefType struct {
 	usedRunes    map[int]int   // Array of used runes
 }
 
+// UnmarshalJSON handles both array (legacy) and map (new) formats for Cw
+func (f *fontDefType) UnmarshalJSON(data []byte) error {
+	type Alias fontDefType
+	aux := &struct {
+		Cw interface{}
+		*Alias
+	}{
+		Alias: (*Alias)(f),
+	}
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	f.Cw = make(map[int]int)
+	if aux.Cw != nil {
+		switch v := aux.Cw.(type) {
+		case []interface{}:
+			for i, val := range v {
+				if fVal, ok := val.(float64); ok {
+					if fVal != 0 {
+						f.Cw[i] = int(fVal)
+					}
+				}
+			}
+		case map[string]interface{}:
+			for k, val := range v {
+				if fVal, ok := val.(float64); ok {
+					if i, err := strconv.Atoi(k); err == nil {
+						f.Cw[i] = int(fVal)
+					}
+				}
+			}
+		}
+	}
+	return nil
+}
+
 // generateFontID generates a font Id from the font definition
 func generateFontID(fdt fontDefType) (string, error) {
 	// file can be different if generated in different instance
diff --git a/font.go b/font.go
index 29417bb0..a309c764 100644
--- a/font.go
+++ b/font.go
@@ -344,7 +344,13 @@ func makeDefinitionFile(fileStr, tpStr, encodingFileStr string, embed bool, encL
 	// dump(def.Desc.FontBBox)
 	def.Up = info.UnderlinePosition
 	def.Ut = info.UnderlineThickness
-	def.Cw = info.Widths
+	// def.Cw = info.Widths
+	def.Cw = make(map[int]int)
+	for i, w := range info.Widths {
+		if w != 0 {
+			def.Cw[i] = w
+		}
+	}
 	def.Enc = baseNoExt(encodingFileStr)
 	// fmt.Printf("encodingFileStr [%s], def.Enc [%s]\n", encodingFileStr, def.Enc)
 	// fmt.Printf("reference [%s]\n", filepath.Join(filepath.Dir(encodingFileStr), "cp1252.map"))
diff --git a/fpdf.go b/fpdf.go
index 3be1cdc2..7a79f241 100644
--- a/fpdf.go
+++ b/fpdf.go
@@ -963,9 +963,10 @@ func (f *Fpdf) GetStringSymbolWidth(s string) int {
 		unicode := []rune(s)
 		for _, char := range unicode {
 			intChar := int(char)
-			if len(f.currentFont.Cw) >= intChar && f.currentFont.Cw[intChar] > 0 {
-				if f.currentFont.Cw[intChar] != 65535 {
-					w += f.currentFont.Cw[intChar]
+			width, ok := f.currentFont.Cw[intChar]
+			if ok && width > 0 {
+				if width != 65535 {
+					w += width
 				}
 			} else if f.currentFont.Desc.MissingWidth != 0 {
 				w += f.currentFont.Desc.MissingWidth
@@ -974,11 +975,22 @@ func (f *Fpdf) GetStringSymbolWidth(s string) int {
 			}
 		}
 	} else {
-		for _, ch := range []byte(s) {
-			if ch == 0 {
+		for _, char := range []byte(s) {
+			if char == 0 {
 				break
 			}
-			w += f.currentFont.Cw[ch]
+			ch := int(char)
+			if width, ok := f.currentFont.Cw[ch]; ok {
+				w += width
+			} else {
+				// Default behavior for non-existent char in map (should ideally not happen for byte fonts if properly initialized)
+				// Or assume missing width
+				if f.currentFont.Desc.MissingWidth != 0 {
+					w += f.currentFont.Desc.MissingWidth
+				} else {
+					w += 500
+				}
+			}
 		}
 	}
 	return w
@@ -2545,7 +2557,7 @@ func (f *Fpdf) SplitLines(txt []byte, w float64) [][]byte {
 	l := 0
 	for i < nb {
 		c := s[i]
-		l += cw[c]
+		l += cw[int(c)]
 		if c == ' ' || c == '\t' || c == '\n' {
 			sep = i
 		}
@@ -2709,14 +2721,14 @@ func (f *Fpdf) MultiCell(w, h float64, txtStr, borderStr, alignStr string, fill
 			ls = l
 			ns++
 		}
-		if int(c) >= len(cw) {
-			f.err = fmt.Errorf("character outside the supported range: %s", string(c))
-			return
-		}
-		if cw[int(c)] == 0 { //Marker width 0 used for missing symbols
+		// if int(c) >= len(cw) {
+		// 	f.err = fmt.Errorf("character outside the supported range: %s", string(c))
+		// 	return
+		// }
+		if width, ok := cw[int(c)]; !ok || width == 0 { //Marker width 0 used for missing symbols
 			l += f.currentFont.Desc.MissingWidth
-		} else if cw[int(c)] != 65535 { //Marker width 65535 used for zero width symbols
-			l += cw[int(c)]
+		} else if width != 65535 { //Marker width 65535 used for zero width symbols
+			l += width
 		}
 		if l > wmax {
 			// Automatic line break
@@ -4079,7 +4091,11 @@ func (f *Fpdf) putfonts() {
 				var s fmtBuffer
 				s.WriteString("[")
 				for j := 32; j < 256; j++ {
-					s.printf("%d ", font.Cw[j])
+					if width, ok := font.Cw[j]; ok {
+						s.printf("%d ", width)
+					} else {
+						s.WriteString("0 ")
+					}
 				}
 				s.WriteString("]")
 				f.out(s.String())
@@ -4200,10 +4216,10 @@ func (f *Fpdf) generateCIDFontMap(font *fontDefType, LastRune int) {
 
 	// for each character
 	for cid := startCid; cid < cwLen; cid++ {
-		if font.Cw[cid] == 0x00 {
+		width, ok := font.Cw[cid]
+		if !ok || width == 0x00 {
 			continue
 		}
-		width := font.Cw[cid]
 		if width == 65535 {
 			width = 0
 		}
diff --git a/splittext.go b/splittext.go
index 525f93b0..b203e298 100644
--- a/splittext.go
+++ b/splittext.go
@@ -25,7 +25,7 @@ func (f *Fpdf) SplitText(txt string, w float64) (lines []string) {
 	l := 0
 	for i < nb {
 		c := s[i]
-		l += cw[c]
+		l += cw[int(c)]
 		if unicode.IsSpace(c) || isChinese(c) {
 			sep = i
 		}
diff --git a/utf8fontfile.go b/utf8fontfile.go
index 0e1a17a7..f0f96df2 100644
--- a/utf8fontfile.go
+++ b/utf8fontfile.go
@@ -51,7 +51,7 @@ type utf8FontFile struct {
 	Flags                int
 	UnderlinePosition    float64
 	UnderlineThickness   float64
-	CharWidths           []int
+	CharWidths           map[int]int
 	DefaultWidth         float64
 	symbolData           map[int]map[string][]int
 	CodeSymbolDictionary map[int]int
@@ -836,7 +836,7 @@ func (utf *utf8FontFile) parseHMTXTable(numberOfHMetrics, numSymbols int, symbol
 	start := utf.SeekTable("hmtx")
 	arrayWidths := 0
 	var arr []int
-	utf.CharWidths = make([]int, 256*256)
+	utf.CharWidths = make(map[int]int)
 	charCount := 0
 	arr = unpackUint16Array(utf.getRange(start, numberOfHMetrics*4))
 	for symbol := 0; symbol < numberOfHMetrics; symbol++ {
@@ -856,10 +856,8 @@ func (utf *utf8FontFile) parseHMTXTable(numberOfHMetrics, numSymbols int, symbol
 					if widths == 0 {
 						widths = 65535
 					}
-					if char < 196608 {
-						utf.CharWidths[char] = widths
-						charCount++
-					}
+					utf.CharWidths[char] = widths
+					charCount++
 				}
 			}
 		}
@@ -874,10 +872,8 @@ func (utf *utf8FontFile) parseHMTXTable(numberOfHMetrics, numSymbols int, symbol
 					if widths == 0 {
 						widths = 65535
 					}
-					if char < 196608 {
-						utf.CharWidths[char] = widths
-						charCount++
-					}
+					utf.CharWidths[char] = widths
+					charCount++
 				}
 			}
 		}
@@ -967,9 +963,7 @@ func (utf *utf8FontFile) generateSCCSDictionaries(runeCmapPosition int, symbolCh
 				}
 			}
 			charSymbolDictionary[char] = symbol
-			if char < 196608 {
-				maxRune = max(char, maxRune)
-			}
+			maxRune = max(char, maxRune)
 			symbolCharDictionary[symbol] = append(symbolCharDictionary[symbol], char)
 		}
 	}
diff --git a/util.go b/util.go
index 351d3192..ea5e1c4c 100644
--- a/util.go
+++ b/util.go
@@ -115,29 +115,16 @@ func utf8toutf16(s string, withBOM ...bool) string {
 	if bom {
 		res = append(res, 0xFE, 0xFF)
 	}
-	nb := len(s)
-	i := 0
-	for i < nb {
-		c1 := byte(s[i])
-		i++
-		switch {
-		case c1 >= 224:
-			// 3-byte character
-			c2 := byte(s[i])
-			i++
-			c3 := byte(s[i])
-			i++
-			res = append(res, ((c1&0x0F)<<4)+((c2&0x3C)>>2),
-				((c2&0x03)<<6)+(c3&0x3F))
-		case c1 >= 192:
-			// 2-byte character
-			c2 := byte(s[i])
-			i++
-			res = append(res, ((c1 & 0x1C) >> 2),
-				((c1&0x03)<<6)+(c2&0x3F))
-		default:
-			// Single-byte character
-			res = append(res, 0, c1)
+	for _, r := range s {
+		if r < 0x10000 {
+			// BMP character
+			res = append(res, byte(r>>8), byte(r))
+		} else {
+			// Supplementary character (needs surrogate pair)
+			r -= 0x10000
+			high := 0xD800 | (r >> 10)
+			low := 0xDC00 | (r & 0x3FF)
+			res = append(res, byte(high>>8), byte(high), byte(low>>8), byte(low))
 		}
 	}
 	return string(res)

From 5dfb7936551e0b744a1676149f38e7921f583976 Mon Sep 17 00:00:00 2001
From: avdoseferovic <eo.avdo@gmail.com>
Date: Fri, 16 Jan 2026 16:38:48 +0100
Subject: [PATCH 2/2] Refactor: Phase 2 and 3 implementation for Emoji support

Changes:

- Implement CMAP Format 12 parsing in utf8fontfile.go.

- Implement CID remapping in fpdf.go to support characters outside BMP (e.g. Emojis).

- Add runeToCid map to fontDefType.

- Add helper methods stringToCIDs and getOrAssignCID.

- Update Text, CellFormat, and generateCIDFontMap to use CID remapping and correct width lookup.

- Update parseSymbols to use CIDs as keys for GID lookup.
---
 def.go          |   2 +
 fpdf.go         | 109 ++++++++++++++++++++++++++++++++++++++++--------
 utf8fontfile.go |  50 +++++++++++++++++-----
 3 files changed, 133 insertions(+), 28 deletions(-)

diff --git a/def.go b/def.go
index 9a5f16c1..bb2cb16d 100644
--- a/def.go
+++ b/def.go
@@ -714,6 +714,8 @@ type fontDefType struct {
 	i            string        // 1-based position in font list, set by font loader, not this program
 	utf8File     *utf8FontFile // UTF-8 font
 	usedRunes    map[int]int   // Array of used runes
+	runeToCid    map[int]int   // Map of rune to CID (for remapping)
+	nextFreeCID  int           // Next available CID for remapping
 }
 
 // UnmarshalJSON handles both array (legacy) and map (new) formats for Cw
diff --git a/fpdf.go b/fpdf.go
index 7a79f241..d4b5b3ea 100644
--- a/fpdf.go
+++ b/fpdf.go
@@ -1745,6 +1745,10 @@ func (f *Fpdf) addFont(familyStr, styleStr, fileStr string, isUTF8 bool) {
 			usedRunes: sbarr,
 			File:      fileStr,
 			utf8File:  utf8File,
+			runeToCid: make(map[int]int),
+		}
+		for cid, r := range sbarr {
+			def.runeToCid[r] = cid
 		}
 		def.i, _ = generateFontID(def)
 		f.fonts[fontKey] = def
@@ -1881,6 +1885,10 @@ func (f *Fpdf) addFontFromBytes(familyStr, styleStr string, jsonFileBytes, zFile
 			Cw:        utf8File.CharWidths,
 			utf8File:  utf8File,
 			usedRunes: sbarr,
+			runeToCid: make(map[int]int),
+		}
+		for cid, r := range sbarr {
+			def.runeToCid[r] = cid
 		}
 		def.i, _ = generateFontID(def)
 		f.fonts[fontkey] = def
@@ -2213,6 +2221,61 @@ func (f *Fpdf) Bookmark(txtStr string, level int, y float64) {
 	f.outlines = append(f.outlines, outlineType{text: txtStr, level: level, y: y, p: f.PageNo(), prev: -1, last: -1, next: -1, first: -1})
 }
 
+func (f *Fpdf) getOrAssignCID(r int) int {
+	if cid, ok := f.currentFont.runeToCid[r]; ok {
+		return cid
+	}
+
+	cid := r
+	// If the rune is in BMP and not already used as a CID for another rune (identity mapping), use it.
+	// But we must check if 'cid' is already occupied by a different rune?
+	// If runeToCid is empty initially, and usedRunes is empty.
+	// We want to prefer Identity.
+	// Check if this CID slot is free in usedRunes.
+	// Note: usedRunes[cid] = original_rune
+	if r < 0xFFFF {
+		if original, used := f.currentFont.usedRunes[r]; !used || original == r {
+			cid = r
+		} else {
+			cid = f.findNextFreeCID()
+		}
+	} else {
+		cid = f.findNextFreeCID()
+	}
+
+	f.currentFont.runeToCid[r] = cid
+	f.currentFont.usedRunes[cid] = r
+	return cid
+}
+
+func (f *Fpdf) findNextFreeCID() int {
+	// Start searching from PUA
+	start := 0xE000
+	for i := start; i < 0xFFFF; i++ {
+		if _, used := f.currentFont.usedRunes[i]; !used {
+			return i
+		}
+	}
+	// If PUA full, search from beginning?
+	for i := 32; i < 0xE000; i++ {
+		if _, used := f.currentFont.usedRunes[i]; !used {
+			return i
+		}
+	}
+	// Fallback to 0 if full (should panic?)
+	return 0
+}
+
+func (f *Fpdf) stringToCIDs(s string) string {
+	var b bytes.Buffer
+	for _, r := range s {
+		cid := f.getOrAssignCID(int(r))
+		b.WriteByte(byte(cid >> 8))
+		b.WriteByte(byte(cid))
+	}
+	return b.String()
+}
+
 // Text prints a character string. The origin (x, y) is on the left of the
 // first character at the baseline. This method permits a string to be placed
 // precisely on the page, but it is usually easier to use Cell(), MultiCell()
@@ -2224,10 +2287,7 @@ func (f *Fpdf) Text(x, y float64, txtStr string) {
 			txtStr = reverseText(txtStr)
 			x -= f.GetStringWidth(txtStr)
 		}
-		txt2 = f.escape(utf8toutf16(txtStr, false))
-		for _, uni := range []rune(txtStr) {
-			f.currentFont.usedRunes[int(uni)] = int(uni)
-		}
+		txt2 = f.escape(f.stringToCIDs(txtStr))
 	} else {
 		txt2 = f.escape(txtStr)
 	}
@@ -2436,10 +2496,10 @@ func (f *Fpdf) CellFormat(w, h float64, txtStr, borderStr string, ln int,
 				txtStr = reverseText(txtStr)
 			}
 			wmax := int(math.Ceil((w - 2*f.cMargin) * 1000 / f.fontSize))
-			for _, uni := range []rune(txtStr) {
-				f.currentFont.usedRunes[int(uni)] = int(uni)
-			}
-			space := f.escape(utf8toutf16(" ", false))
+			// for _, uni := range []rune(txtStr) {
+			// 	f.currentFont.usedRunes[int(uni)] = int(uni)
+			// }
+			space := f.escape(f.stringToCIDs(" "))
 			strSize := f.GetStringSymbolWidth(txtStr)
 			s.printf("BT 0 Tw %.2f %.2f Td [", (f.x+dx)*k, (f.h-(f.y+.5*h+.3*f.fontSize))*k)
 			t := strings.Split(txtStr, " ")
@@ -2447,7 +2507,7 @@ func (f *Fpdf) CellFormat(w, h float64, txtStr, borderStr string, ln int,
 			numt := len(t)
 			for i := 0; i < numt; i++ {
 				tx := t[i]
-				tx = "(" + f.escape(utf8toutf16(tx, false)) + ")"
+				tx = "(" + f.escape(f.stringToCIDs(tx)) + ")"
 				s.printf("%s ", tx)
 				if (i + 1) < numt {
 					s.printf("%.3f(%s) ", -shift, space)
@@ -2460,10 +2520,10 @@ func (f *Fpdf) CellFormat(w, h float64, txtStr, borderStr string, ln int,
 				if f.isRTL {
 					txtStr = reverseText(txtStr)
 				}
-				txt2 = f.escape(utf8toutf16(txtStr, false))
-				for _, uni := range []rune(txtStr) {
-					f.currentFont.usedRunes[int(uni)] = int(uni)
-				}
+				txt2 = f.escape(f.stringToCIDs(txtStr))
+				// for _, uni := range []rune(txtStr) {
+				// 	f.currentFont.usedRunes[int(uni)] = int(uni)
+				// }
 			} else {
 
 				txt2 = strings.Replace(txtStr, "\\", "\\\\", -1)
@@ -2848,7 +2908,12 @@ func (f *Fpdf) write(h float64, txtStr string, link int, linkStr string) {
 		if c == ' ' {
 			sep = i
 		}
-		l += float64(cw[int(c)])
+		// l += float64(cw[int(c)])
+		if width, ok := cw[int(c)]; ok {
+			l += float64(width)
+		} else {
+			l += float64(f.currentFont.Desc.MissingWidth)
+		}
 		if l > wmax {
 			// Automatic line break
 			if sep == -1 {
@@ -4216,16 +4281,24 @@ func (f *Fpdf) generateCIDFontMap(font *fontDefType, LastRune int) {
 
 	// for each character
 	for cid := startCid; cid < cwLen; cid++ {
-		width, ok := font.Cw[cid]
+		runa, used := font.usedRunes[cid]
+		if cid > 255 && (!used || runa == 0) {
+			continue
+		}
+		if !used {
+			runa = cid
+		}
+
+		width, ok := font.Cw[runa]
 		if !ok || width == 0x00 {
 			continue
 		}
 		if width == 65535 {
 			width = 0
 		}
-		if numb, OK := font.usedRunes[cid]; cid > 255 && (!OK || numb == 0) {
-			continue
-		}
+		// if numb, OK := font.usedRunes[cid]; cid > 255 && (!OK || numb == 0) {
+		// 	continue
+		// }
 
 		if cid == prevCid+1 {
 			if width == prevWidth {
diff --git a/utf8fontfile.go b/utf8fontfile.go
index f0f96df2..c41ab897 100644
--- a/utf8fontfile.go
+++ b/utf8fontfile.go
@@ -456,14 +456,18 @@ func (utf *utf8FontFile) parseCMAPTable(format int) int {
 		coded := utf.readUint16()
 		position := utf.readUint32()
 		oldReaderPosition := utf.fileReader.readerPosition
-		if (system == 3 && coded == 1) || system == 0 { // Microsoft, Unicode
+		// System 3: Windows
+		// Coded 1: Unicode BMP (UCS-2)
+		// Coded 10: Unicode Full (UCS-4)
+		if (system == 3 && (coded == 1 || coded == 10)) || system == 0 {
 			format = utf.getUint16(cmapPosition + position)
-			if format == 4 {
-				if cidCMAPPosition == 0 {
-					cidCMAPPosition = cmapPosition + position
-				}
+			if format == 12 {
+				cidCMAPPosition = cmapPosition + position
 				break
 			}
+			if format == 4 {
+				cidCMAPPosition = cmapPosition + position
+			}
 		}
 		utf.seek(int(oldReaderPosition))
 	}
@@ -504,12 +508,15 @@ func (utf *utf8FontFile) generateCMAP() map[int][]int {
 		coder := utf.readUint16()
 		position := utf.readUint32()
 		oldPosition := utf.fileReader.readerPosition
-		if (system == 3 && coder == 1) || system == 0 {
+		if (system == 3 && (coder == 1 || coder == 10)) || system == 0 {
 			format := utf.getUint16(cmapPosition + position)
-			if format == 4 {
+			if format == 12 {
 				runeCmapPosition = cmapPosition + position
 				break
 			}
+			if format == 4 {
+				runeCmapPosition = cmapPosition + position
+			}
 		}
 		utf.seek(int(oldPosition))
 	}
@@ -531,13 +538,13 @@ func (utf *utf8FontFile) generateCMAP() map[int][]int {
 func (utf *utf8FontFile) parseSymbols(usedRunes map[int]int) (map[int]int, map[int]int, map[int]int, []int) {
 	symbolCollection := map[int]int{0: 0}
 	charSymbolPairCollection := make(map[int]int)
-	for _, char := range usedRunes {
+	for cid, char := range usedRunes {
 		if _, OK := utf.charSymbolDictionary[char]; OK {
 			symbolCollection[utf.charSymbolDictionary[char]] = char
-			charSymbolPairCollection[char] = utf.charSymbolDictionary[char]
+			charSymbolPairCollection[cid] = utf.charSymbolDictionary[char]
 
 		}
-		utf.LastRune = max(utf.LastRune, char)
+		utf.LastRune = max(utf.LastRune, cid)
 	}
 
 	begin := utf.tableDescriptions["glyf"].position
@@ -918,6 +925,29 @@ func (utf *utf8FontFile) parseLOCATable(format, numSymbols int) {
 }
 
 func (utf *utf8FontFile) generateSCCSDictionaries(runeCmapPosition int, symbolCharDictionary map[int][]int, charSymbolDictionary map[int]int) {
+	utf.seek(runeCmapPosition)
+	format := utf.readUint16()
+
+	if format == 12 {
+		utf.skip(2) // reserved
+		_ = utf.readUint32() // length
+		utf.skip(4) // language
+		nGroups := utf.readUint32()
+
+		for i := 0; i < int(nGroups); i++ {
+			startCharCode := int(utf.readUint32())
+			endCharCode := int(utf.readUint32())
+			startGlyphID := int(utf.readUint32())
+
+			for char := startCharCode; char <= endCharCode; char++ {
+				symbol := startGlyphID + (char - startCharCode)
+				charSymbolDictionary[char] = symbol
+				symbolCharDictionary[symbol] = append(symbolCharDictionary[symbol], char)
+			}
+		}
+		return
+	}
+
 	maxRune := 0
 	utf.seek(runeCmapPosition + 2)
 	size := utf.readUint16()