From 59e0332ac27483225afece697b0d5574adea4d77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A9pin?= Date: Sat, 13 Feb 2021 22:13:56 +0100 Subject: [PATCH 1/3] Binary insertion sort --- src/lib/array/insertionSort.ml | 53 ++++++++++++++++++++++++++++++++++ tests/array/dune | 2 +- tests/array/insertionSort.ml | 19 ++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 src/lib/array/insertionSort.ml create mode 100644 tests/array/insertionSort.ml diff --git a/src/lib/array/insertionSort.ml b/src/lib/array/insertionSort.ml new file mode 100644 index 0000000..8aaae8e --- /dev/null +++ b/src/lib/array/insertionSort.ml @@ -0,0 +1,53 @@ +(** Binary insertion sort *) + +type 'a cmp = 'a -> 'a -> int + +let rec binary_search cmp t x lo hi = + (* assert a[lo: hi + 1] is sorted *) + assert (0 <= lo); + assert (lo <= hi + 1); (* lo should generally be <= hi except eventually in + the last recursive call where hi might be equal to + lo - 1. In this case lo is "correct" and hi is "out + of bounds" and must be replaced by hi + 1. *) + assert (hi < Array.length t); + (* ensures: (forall i < result. cmp a.(i) x <= 0) + /\ (forall i >= result. cmp a.(i) x > 0) *) + + let mid = lo + (hi - lo) / 2 in + assert (lo >= hi || mid < hi); + + let mid_le_x = cmp t.(mid) x <= 0 in + + if lo >= hi then + if mid_le_x then lo + 1 else lo + (* Or the "branching-free": *) + (* lo + (Obj.magic mid_le_x: int) *) + else if mid_le_x then + binary_search cmp t x (mid + 1) hi + else + binary_search cmp t x lo (mid - 1) + + +let sort cmp t lo hi = + assert (0 <= lo); + assert (lo <= hi); + assert (hi < Array.length t); + (* ensures: t[lo: hi + 1] is sorted *) + + for i = lo + 1 to hi do + (* invariant: t[lo: i] is sorted. *) + let x = t.(i) in + let pos = binary_search cmp t x lo (i - 1) in + Array.blit t pos t (pos + 1) (i - pos); + t.(pos) <- x + done + +let%test _ = + let t = [|4; 2; 6; 3|] in + sort Int.compare t 0 3; + t = [|2; 3; 4; 6|] + +let%test _ = + let t = [|9; 4; 8; 2; 1; 0|] in + sort Int.compare t 1 4; + t = [|9; 1; 2; 4; 8; 0|] diff --git a/tests/array/dune b/tests/array/dune index d84df11..13c9d7a 100644 --- a/tests/array/dune +++ b/tests/array/dune @@ -1,3 +1,3 @@ (tests - (names timsort) + (names timsort insertionSort) (libraries sorting_array genlib)) diff --git a/tests/array/insertionSort.ml b/tests/array/insertionSort.ml new file mode 100644 index 0000000..3be42e7 --- /dev/null +++ b/tests/array/insertionSort.ml @@ -0,0 +1,19 @@ +open Sorting_array +open Genlib + +let rec test gen ~nb ~len = + if nb <= 0 then () + else begin + let t = gen len in + let t' = Array.copy t in + Array.stable_sort Int.compare t'; + InsertionSort.sort Int.compare t 0 (len - 1); + assert (t = t'); + test gen ~nb:(nb - 1) ~len + end + +let () = + let nb = 100 in + let len = 1000 in + test Genarray.gen_unif ~nb ~len; + test (Genarray.gen_k_runs 5) ~nb ~len From 503177af3341fc4ea2a29e91b8b667c2b1c6c8e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A9pin?= Date: Sun, 14 Feb 2021 00:02:37 +0100 Subject: [PATCH 2/3] Rewrite InsertionSort.sort in a recursive fashion Also expose an auxiliary function which is going to be useful for TimSort and cie. --- src/lib/array/insertionSort.ml | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/src/lib/array/insertionSort.ml b/src/lib/array/insertionSort.ml index 8aaae8e..4c8f4c3 100644 --- a/src/lib/array/insertionSort.ml +++ b/src/lib/array/insertionSort.ml @@ -2,6 +2,9 @@ type 'a cmp = 'a -> 'a -> int + +(** Search where to insert x in the sorted slice t[lo: hi + 1]. + Performs O(log(hi - lo)) comparisons. *) let rec binary_search cmp t x lo hi = (* assert a[lo: hi + 1] is sorted *) assert (0 <= lo); @@ -28,19 +31,33 @@ let rec binary_search cmp t x lo hi = binary_search cmp t x lo (mid - 1) -let sort cmp t lo hi = +(** Sort the slice t[lo: hi + 1] assuming that t[lo: i] is already sorted. *) +let rec sort_from_i cmp t lo hi i = + (* variant: hi - i *) assert (0 <= lo); - assert (lo <= hi); + assert (lo < i); + assert (i <= hi + 1); assert (hi < Array.length t); - (* ensures: t[lo: hi + 1] is sorted *) - - for i = lo + 1 to hi do - (* invariant: t[lo: i] is sorted. *) + (* assert t[lo: i] is sorted *) + (* ensures t[lo: hi + 1] is sorted *) + if i > hi then () + else begin let x = t.(i) in let pos = binary_search cmp t x lo (i - 1) in Array.blit t pos t (pos + 1) (i - pos); - t.(pos) <- x - done + t.(pos) <- x; + sort_from_i cmp t lo hi (i + 1) + end + +let%test _ = + let t = [|9; 1; 4; 5; 3; 2; 0|] in + let i = 4 in + sort_from_i Int.compare t 1 5 i; + t = [|9; 1; 2; 3; 4; 5; 0|] + + +(** Sort the slice t[lo: hi + 1]. *) +let sort cmp t lo hi = sort_from_i cmp t lo hi (lo + 1) let%test _ = let t = [|4; 2; 6; 3|] in From b7f64cbe95d32b669bf27eb5abe7a38b9e91c9a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A9pin?= Date: Sun, 14 Feb 2021 00:29:50 +0100 Subject: [PATCH 3/3] Don't use Array.blit in the insertion sort!! [Array.blit t ofs t (ofs + 1) len] uses memmove under the hood which allocates a temporary array, this is terrible in terms of performance. --- src/lib/array/insertionSort.ml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/lib/array/insertionSort.ml b/src/lib/array/insertionSort.ml index 4c8f4c3..49ecd38 100644 --- a/src/lib/array/insertionSort.ml +++ b/src/lib/array/insertionSort.ml @@ -44,7 +44,9 @@ let rec sort_from_i cmp t lo hi i = else begin let x = t.(i) in let pos = binary_search cmp t x lo (i - 1) in - Array.blit t pos t (pos + 1) (i - pos); + for j = i downto pos + 1 do + t.(j) <- t.(j - 1) + done; t.(pos) <- x; sort_from_i cmp t lo hi (i + 1) end