From 2051c3599291c837127714c7c4b507b8e6f594e0 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Sat, 21 Oct 2017 09:44:33 +0100 Subject: [PATCH 1/5] Fix encoding of high surrogate in UTF-16 --- CHANGES | 2 +- reloc.ml | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index 591ba7e..3ff1481 100644 --- a/CHANGES +++ b/CHANGES @@ -4,7 +4,7 @@ Next version causing https://caml.inria.fr/mantis/view.php?id=7603 - Support for passing argument through external files (-arg/-arg0) (Bernhard Schommer) - +- Fix encoding of high surrogate for U+10000-U+10FFFF in UTF-16 response files (David Allsopp) Version 0.36 - Add Unicode support (patch by Nicolás Ojeda Bär) diff --git a/reloc.ml b/reloc.ml index 8a04e41..f2a6f4d 100644 --- a/reloc.ml +++ b/reloc.ml @@ -151,7 +151,13 @@ let toutf16 s = let cp n = Buffer.add_char b (Char.chr (n land 0xFF)); Buffer.add_char b (Char.chr ((n lsr 8) land 0xFF)) in while !i < String.length s do let n = utf8_next s i in - if n <= 0xFFFF then cp n else (cp (0xD7C0 + (n lsl 10)); cp (0xDC00 + (n land 0x3FF))) + if n <= 0xFFFF then + cp n + else + (* Surrogates *) + let n = n - 0x10000 in + cp (0xD800 + (n lsr 10)); + cp (0xDC00 + (n land 0x3FF)) done; Buffer.contents b From 3c82da172e1fe1bc68151f43a2bc051356edfb19 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Sun, 22 Oct 2017 13:47:13 +0100 Subject: [PATCH 2/5] Create UTF-16 using Buffer.add_utf_16le_uchar --- reloc.ml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/reloc.ml b/reloc.ml index f2a6f4d..1208cc8 100644 --- a/reloc.ml +++ b/reloc.ml @@ -147,17 +147,9 @@ let utf8_next s i = let toutf16 s = let i = ref 0 in - let b = Buffer.create (String.length s) in - let cp n = Buffer.add_char b (Char.chr (n land 0xFF)); Buffer.add_char b (Char.chr ((n lsr 8) land 0xFF)) in + let b = Buffer.create (String.length s * 2) in while !i < String.length s do - let n = utf8_next s i in - if n <= 0xFFFF then - cp n - else - (* Surrogates *) - let n = n - 0x10000 in - cp (0xD800 + (n lsr 10)); - cp (0xDC00 + (n land 0x3FF)) + Buffer.add_utf_16le_uchar b (Uchar.unsafe_of_int (utf8_next s i)) done; Buffer.contents b From 94df4055bd2f17d4495efe893f770170d2dfdaf2 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Sun, 22 Oct 2017 14:34:52 +0100 Subject: [PATCH 3/5] Fix 4.03 bootstrap problem --- appveyor_build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/appveyor_build.sh b/appveyor_build.sh index e134ce1..9239add 100755 --- a/appveyor_build.sh +++ b/appveyor_build.sh @@ -61,6 +61,10 @@ case $OCAMLBRANCH in ;; esac +if [ $OCAMLBRANCH = "4.03" ] ; then + sed -i -e "s/:=.*/:=/" config/Makefile.msvc64 +fi + configure_ocaml if [ ! -f $OCAMLROOT/STAMP ] || [ "$(git rev-parse HEAD)" != "$(cat $OCAMLROOT/STAMP)" ]; then From 5484f438064c0fe6ed528bd5f388f0f5ce801520 Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Sun, 22 Oct 2017 14:48:18 +0100 Subject: [PATCH 4/5] Add Buffer.add_utf_16le_uchar to compatibility layer --- Compat406.ml | 27 +++++++++++++++++++++++++++ Makefile | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Compat406.ml diff --git a/Compat406.ml b/Compat406.ml new file mode 100644 index 0000000..ed90628 --- /dev/null +++ b/Compat406.ml @@ -0,0 +1,27 @@ +(************************************************************************) +(* FlexDLL *) +(* Alain Frisch *) +(* *) +(* Copyright 2007 Institut National de Recherche en Informatique et *) +(* en Automatique. *) +(************************************************************************) + +module Buffer = struct + include Buffer + + (* Taken from 4.06.0 *) + let add_utf_16le_uchar b u = match Uchar.to_int u with + | u when u < 0 -> assert false + | u when u <= 0xFFFF -> + Buffer.add_char b (Char.unsafe_chr (u land 0xFF)); + Buffer.add_char b (Char.unsafe_chr (u lsr 8)) + | u when u <= 0x10FFFF -> + let u' = u - 0x10000 in + let hi = 0xD800 lor (u' lsr 10) in + let lo = 0xDC00 lor (u' land 0x3FF) in + Buffer.add_char b (Char.unsafe_chr (hi land 0xFF)); + Buffer.add_char b (Char.unsafe_chr (hi lsr 8)); + Buffer.add_char b (Char.unsafe_chr (lo land 0xFF)); + Buffer.add_char b (Char.unsafe_chr (lo lsr 8)) + | _ -> assert false +end diff --git a/Makefile b/Makefile index 9d2eda5..89ddd6f 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ COMPILER-$(OCAML_VERSION): test_ver = $(shell if [ $(OCAML_VERSION) -lt $(1) ] ; then echo lt ; fi) -Compat.ml: COMPILER-$(OCAML_VERSION) $(if $(call test_ver,4050),Compat405.ml) $(if $(call test_ver,4030),Compat403.ml) $(if $(call test_ver,4020),Compat402.ml) +Compat.ml: COMPILER-$(OCAML_VERSION) $(if $(call test_ver,4060),Compat406.ml) $(if $(call test_ver,4050),Compat405.ml) $(if $(call test_ver,4030),Compat403.ml) $(if $(call test_ver,4020),Compat402.ml) cat $^ > $@ flexlink.exe: $(OBJS) $(RES) From f8281cc54388537217ef9d8e9360b970775a1add Mon Sep 17 00:00:00 2001 From: David Allsopp Date: Sun, 22 Oct 2017 14:54:03 +0100 Subject: [PATCH 5/5] Add required Uchar functions to compatibility layer --- Compat403.ml | 5 +++++ Makefile | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Compat403.ml b/Compat403.ml index 8f2c88c..ebbf985 100644 --- a/Compat403.ml +++ b/Compat403.ml @@ -38,3 +38,8 @@ module String = struct (map Char.lowercase_ascii, map Char.uppercase_ascii) end +module Uchar = struct + let unsafe_of_int c = c + + let to_int c = c +end diff --git a/Makefile b/Makefile index 89ddd6f..38db1cf 100644 --- a/Makefile +++ b/Makefile @@ -131,7 +131,7 @@ COMPILER-$(OCAML_VERSION): test_ver = $(shell if [ $(OCAML_VERSION) -lt $(1) ] ; then echo lt ; fi) -Compat.ml: COMPILER-$(OCAML_VERSION) $(if $(call test_ver,4060),Compat406.ml) $(if $(call test_ver,4050),Compat405.ml) $(if $(call test_ver,4030),Compat403.ml) $(if $(call test_ver,4020),Compat402.ml) +Compat.ml: COMPILER-$(OCAML_VERSION) $(if $(call test_ver,4020),Compat402.ml) $(if $(call test_ver,4030),Compat403.ml) $(if $(call test_ver,4050),Compat405.ml) $(if $(call test_ver,4060),Compat406.ml) cat $^ > $@ flexlink.exe: $(OBJS) $(RES)