diff --git a/benches/base62.rs b/benches/base62.rs
index 19ad1a5..bd9f271 100644
--- a/benches/base62.rs
+++ b/benches/base62.rs
@@ -97,7 +97,14 @@ pub fn criterion_benchmark(c: &mut Criterion) {
     });
 
     group.bench_function("standard_buf_fixed", |b| {
-        b.iter(|| encode_buf(black_box(u128::MAX), black_box(&mut String::new())))
+        b.iter_batched_ref(
+            || String::with_capacity(22),
+            |buf| {
+                buf.clear();
+                encode_buf(black_box(u128::MAX), black_box(buf))
+            },
+            BatchSize::SmallInput,
+        )
     });
 
     group.bench_function("standard_buf_random", |b| {
@@ -176,7 +183,14 @@ pub fn criterion_benchmark(c: &mut Criterion) {
     });
 
     group.bench_function("alternative_buf_fixed", |b| {
-        b.iter(|| encode_alternative_buf(black_box(u128::MAX), black_box(&mut String::new())))
+        b.iter_batched_ref(
+            || String::with_capacity(22),
+            |buf| {
+                buf.clear();
+                encode_alternative_buf(black_box(u128::MAX), black_box(buf))
+            },
+            BatchSize::SmallInput,
+        )
     });
 
     group.bench_function("alternative_buf_random", |b| {
diff --git a/src/lib.rs b/src/lib.rs
index 0dd4306..483c92c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,7 +22,7 @@ extern crate alloc;
 #[cfg(feature = "std")]
 extern crate std;
 
-use core::{convert::TryInto, fmt};
+use core::{convert::TryInto, fmt, num::NonZeroU128};
 
 const BASE: u64 = 62;
 const BASE_TO_2: u64 = BASE * BASE;
@@ -51,15 +51,13 @@ const BASE_TO_21: u128 = BASE_TO_20 * BASE as u128;
 const DIV_BASE_TO_10_MULTIPLY: u128 = 233718071534448225491982379416108680074;
 const DIV_BASE_TO_10_SHIFT: u8 = 59;
 
-#[repr(align(64))]
 struct StandardTables {
-    encode: [u8; 62],
+    encode_pairs: [[u8; 2]; BASE_TO_2 as usize],
     decode: [u8; 128],
 }
 
-#[repr(align(64))]
 struct AlternativeTables {
-    encode: [u8; 62],
+    encode_pairs: [[u8; 2]; BASE_TO_2 as usize],
     decode: [u8; 128],
 }
 
@@ -74,6 +72,16 @@ impl StandardTables {
             b'u', b'v', b'w', b'x', b'y', b'z',
         ];
 
+        // Generate pair table: index i represents (i / 62, i % 62)
+        let mut encode_pairs = [[0u8; 2]; BASE_TO_2 as usize];
+        let mut i = 0usize;
+        while i < BASE_TO_2 as usize {
+            let hi = i / 62;
+            let lo = i % 62;
+            encode_pairs[i] = [ENCODE[hi], ENCODE[lo]];
+            i += 1;
+        }
+
         let mut decode = [255u8; 128];
 
         // Populate decode table
@@ -94,7 +102,7 @@ impl StandardTables {
         }
 
         Self {
-            encode: ENCODE,
+            encode_pairs,
             decode,
         }
     }
@@ -111,6 +119,16 @@ impl AlternativeTables {
             b'U', b'V', b'W', b'X', b'Y', b'Z',
         ];
 
+        // Generate pair table: index i represents (i / 62, i % 62)
+        let mut encode_pairs = [[0u8; 2]; BASE_TO_2 as usize];
+        let mut i = 0usize;
+        while i < BASE_TO_2 as usize {
+            let hi = i / 62;
+            let lo = i % 62;
+            encode_pairs[i] = [ENCODE[hi], ENCODE[lo]];
+            i += 1;
+        }
+
         let mut decode = [255u8; 128];
 
         // Populate decode table
@@ -131,7 +149,7 @@ impl AlternativeTables {
         }
 
         Self {
-            encode: ENCODE,
+            encode_pairs,
             decode,
         }
     }
@@ -323,35 +341,47 @@ pub fn encode_alternative_io<T: Into<u128>, W: std::io::Write + ?Sized>(
 
 // Internal functions used by both no_std and alloc features
 pub(crate) fn digit_count(n: u128) -> usize {
-    const POWERS: [u128; 22] = [
+    const THRESHOLDS: [u128; 23] = [
         0,
-        BASE as u128,
-        BASE_TO_2 as u128,
-        BASE_TO_3 as u128,
-        BASE_TO_4 as u128,
-        BASE_TO_5 as u128,
-        BASE_TO_6 as u128,
-        BASE_TO_7 as u128,
-        BASE_TO_8 as u128,
-        BASE_TO_9 as u128,
-        BASE_TO_10,
-        BASE_TO_11,
-        BASE_TO_12,
-        BASE_TO_13,
-        BASE_TO_14,
-        BASE_TO_15,
-        BASE_TO_16,
-        BASE_TO_17,
-        BASE_TO_18,
-        BASE_TO_19,
-        BASE_TO_20,
-        BASE_TO_21,
+        BASE as u128 - 1,
+        BASE_TO_2 as u128 - 1,
+        BASE_TO_3 as u128 - 1,
+        BASE_TO_4 as u128 - 1,
+        BASE_TO_5 as u128 - 1,
+        BASE_TO_6 as u128 - 1,
+        BASE_TO_7 as u128 - 1,
+        BASE_TO_8 as u128 - 1,
+        BASE_TO_9 as u128 - 1,
+        BASE_TO_10 - 1,
+        BASE_TO_11 - 1,
+        BASE_TO_12 - 1,
+        BASE_TO_13 - 1,
+        BASE_TO_14 - 1,
+        BASE_TO_15 - 1,
+        BASE_TO_16 - 1,
+        BASE_TO_17 - 1,
+        BASE_TO_18 - 1,
+        BASE_TO_19 - 1,
+        BASE_TO_20 - 1,
+        BASE_TO_21 - 1,
+        u128::MAX, // sentinel, u128 cannot be larger than this value
     ];
 
-    match POWERS.binary_search(&n) {
-        Ok(n) => n.wrapping_add(1),
-        Err(n) => n,
-    }
+    let Some(n) = NonZeroU128::new(n) else {
+        return 1;
+    };
+    // We want to find floor(log62(n)) + 1 = floor(log2(n) / log2(62)) + 1
+    // First, approximate log2(n) with ilog2 = floor(log2(n)), underestimating by 0 <= err < 1
+    let ilog2 = n.ilog2() as usize;
+
+    // Next, we find floor(ilog2/log2(62)), which is exactly equal to floor(ilog2 * 43/256) for all ilog2 in [0, 127]
+    // The result is an underestimate by up to 1, purely because ilog2 is an underestimate
+    let estimate = ((ilog2 * 43) >> 8) + 1;
+
+    // SAFETY: estimate is in [1,22] since ilog2 is in [0,127] and (127*43)>>8 + 1 = 22
+    let threshold = unsafe { *THRESHOLDS.get_unchecked(estimate) };
+    let bump = (n.get() > threshold) as usize;
+    estimate + bump
 }
 
 #[inline(always)]
@@ -571,22 +601,27 @@ pub fn decode_alternative<T: AsRef<[u8]>>(input: T) -> Result<u128, DecodeError>
 }
 
 // Common encoding function
-unsafe fn encode_impl(num: u128, digits: usize, buf: &mut [u8], encode_table: &[u8; 62]) -> usize {
+unsafe fn encode_impl(
+    num: u128,
+    digits: usize,
+    buf: &mut [u8],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
+) -> usize {
     unsafe {
         if let Ok(num) = TryInto::<u64>::try_into(num) {
-            encode_impl_u64(num, digits, buf, encode_table)
+            encode_impl_u64(num, digits, buf, encode_pairs)
         } else if digits > 20 {
-            encode_impl_over_20_digits(num, digits, buf, encode_table)
+            encode_impl_over_20_digits(num, digits, buf, encode_pairs)
         } else if digits == 20 {
             //  (AAAAAAAAAA, BBBBBBBBBB)
             let (first_u64, second_u64) = div_base_to_10(num);
             // AAAAAAAAAA
             let first_u64 = first_u64 as u64;
 
-            encode_impl_20_digits(first_u64, second_u64, buf, encode_table)
+            encode_impl_20_digits(first_u64, second_u64, buf, encode_pairs)
         } else {
             // digits between 11 and 20 (10 digits would always fit into a u64, which we checked first)
-            encode_impl_over_10_under_20_digits(num, digits, buf, encode_table)
+            encode_impl_over_10_under_20_digits(num, digits, buf, encode_pairs)
         }
     }
 }
@@ -596,29 +631,26 @@ unsafe fn encode_impl_over_20_digits(
     num: u128,
     digits: usize,
     buf: &mut [u8],
-    encode_table: &[u8; 62],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
 ) -> usize {
-    // input: AABBBBBBBBBBCCCCCCCCCC
+    // input: [A]BCCCCCCCCCCDDDDDDDDDD
     //
-    //  (AABBBBBBBBBB, CCCCCCCCCC)
+    //  ([A]BCCCCCCCCCC, DDDDDDDDDD)
     let (num, third_u64) = div_base_to_10(num);
-    //  (AA, BBBBBBBBBB)
+    //  ([A]B, CCCCCCCCCC)
     let (first_u64, second_u64) = div_base_to_10(num);
-    //   AA - no more than two digits as num was 22 digits
+    //   [A]B - no more than two digits as num was 22 digits
     let first_u64 = first_u64 as u64;
 
-    // encode the first one or two digits
-    if digits == 21 {
-        unsafe {
-            *buf.get_unchecked_mut(0) = *encode_table.get_unchecked(first_u64 as usize);
-        }
-    } else {
-        let second_digit = first_u64 % BASE;
-        let first_digit = first_u64 / BASE;
-        unsafe {
-            *buf.get_unchecked_mut(1) = *encode_table.get_unchecked(second_digit as usize);
-            *buf.get_unchecked_mut(0) = *encode_table.get_unchecked(first_digit as usize);
-        }
+    // Branchless 21/22 digit handling of [A]B
+    // For 21 digits: write 0 then overwrite with B at position 0
+    // For 22 digits: write A at position 0, B at position 1
+    unsafe {
+        // [A, B] in 22 digit case or [0, B] in 21 digit case
+        let [c1, c2] = *encode_pairs.get_unchecked(first_u64 as usize);
+        let is_22 = digits - 21; // 0 or 1
+        *buf.get_unchecked_mut(0) = c1;
+        *buf.get_unchecked_mut(is_22) = c2;
     }
 
     // encode the last 20 digits
@@ -627,7 +659,7 @@ unsafe fn encode_impl_over_20_digits(
             second_u64,
             third_u64,
             &mut buf[(digits - 20)..],
-            encode_table,
+            encode_pairs,
         );
     }
 
@@ -639,32 +671,47 @@ unsafe fn encode_impl_20_digits(
     first_u64: u64,
     second_u64: u64,
     buf: &mut [u8],
-    encode_table: &[u8; 62],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
 ) -> usize {
     let first_u32 = (first_u64 / BASE_TO_5) as u32;
     let second_u32 = (first_u64 % BASE_TO_5) as u32;
     let third_u32 = (second_u64 / BASE_TO_5) as u32;
     let fourth_u32 = (second_u64 % BASE_TO_5) as u32;
 
-    // [AAAAA, BBBBB, CCCCC, DDDDD]
+    // [ABCDE, FGHIJ, KLMNO, PQRST]
     let mut nums = [first_u32, second_u32, third_u32, fourth_u32];
-    const STARTING_WRITE_IDXS: [usize; 4] = [5, 10, 15, 20];
-
-    for i in 0..5 {
-        nums.iter_mut()
-            .zip(STARTING_WRITE_IDXS)
-            .for_each(|(num, starting_write_idx)| {
-                let quotient = num.wrapping_div(BASE as u32);
-                let remainder = (*num - (BASE as u32) * quotient) as usize;
-                *num = quotient;
-
-                unsafe {
-                    *buf.get_unchecked_mut(starting_write_idx - i - 1) =
-                        *encode_table.get_unchecked(remainder)
-                }
-            });
+    const BASE_POSITIONS: [usize; 4] = [0, 5, 10, 15];
+
+    for pair_idx in 0..2 {
+        nums.iter_mut().zip(BASE_POSITIONS).for_each(|(num, base)| {
+            // pair_idx 0: ABC, FGH, KLM, PQR
+            // pair_idx 1: A, F, K, P
+            let quotient = *num / BASE_TO_2 as u32;
+            // pair_idx 0: DE, IJ, NO, ST
+            // pair_idx 1: BC, GH, LM, QR
+            let pair = (*num - BASE_TO_2 as u32 * quotient) as usize;
+            *num = quotient;
+
+            // Write positions: base+3,base+4 for pair_idx=0; base+1,base+2 for pair_idx=1
+            let pos = base + 4 - 2 * pair_idx;
+            unsafe {
+                // pair_idx 0: [D, E], [I, J], [N, O], [S, T]
+                // pair_idx 1: [B, C], [G, H], [L, M], [Q, R]
+                let [c1, c2] = *encode_pairs.get_unchecked(pair);
+                *buf.get_unchecked_mut(pos - 1) = c1;
+                *buf.get_unchecked_mut(pos) = c2;
+            }
+        });
     }
 
+    // A, F, K, P
+    nums.iter()
+        .zip(BASE_POSITIONS)
+        .for_each(|(num, base)| unsafe {
+            // num is now a single digit (0-61), use first byte of pair table
+            *buf.get_unchecked_mut(base) = encode_pairs.get_unchecked(*num as usize)[1];
+        });
+
     20
 }
 
@@ -673,7 +720,7 @@ unsafe fn encode_impl_over_10_under_20_digits(
     num: u128,
     digits: usize,
     buf: &mut [u8],
-    encode_table: &[u8; 62],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
 ) -> usize {
     let mut write_idx = digits;
     let mut digit_index = 0_usize;
@@ -689,7 +736,7 @@ unsafe fn encode_impl_over_10_under_20_digits(
         num /= BASE;
 
         unsafe {
-            *buf.get_unchecked_mut(write_idx) = *encode_table.get_unchecked(remainder as usize);
+            *buf.get_unchecked_mut(write_idx) = encode_pairs.get_unchecked(remainder as usize)[1];
         }
 
         digit_index = digit_index.wrapping_add(1);
@@ -706,26 +753,25 @@ unsafe fn encode_impl_u64(
     num: u64,
     digits: usize,
     buf: &mut [u8],
-    encode_table: &[u8; 62],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
 ) -> usize {
-    if digits == 11 {
-        // ABBBBBBBBBB
-
-        // A
-        let first_u64 = num / (BASE_TO_10 as u64);
-        // BBBBBBBBBB
-        let second_u64 = num % (BASE_TO_10 as u64);
+    if digits >= 10 {
+        // Branchless 10/11 digit handling
+        // For 10 digits:  BBBBBBBBBB -> first_digit=0, remainder=BBBBBBBBBB, offset=0 (buf[0] gets overwritten)
+        // For 11 digits: ABBBBBBBBBB -> first_digit=A, remainder=BBBBBBBBBB, offset=1
+        let first_digit = num / (BASE_TO_10 as u64);
+        let remainder = num % (BASE_TO_10 as u64);
+        let offset = digits - 10; // 0 or 1
 
         unsafe {
-            *buf.get_unchecked_mut(0) = *encode_table.get_unchecked(first_u64 as usize);
+            // This is unnecessary work for the 10 digit case, but its very cheap work and allows us to avoid a branch
+            *buf.get_unchecked_mut(0) = encode_pairs.get_unchecked(first_digit as usize)[1];
 
-            encode_impl_u64_10_digits(second_u64, &mut buf[1..], encode_table);
+            encode_impl_u64_10_digits(remainder, &mut buf[offset..], encode_pairs);
         }
         digits
-    } else if digits == 10 {
-        unsafe { encode_impl_u64_10_digits(num, buf, encode_table) }
     } else {
-        unsafe { encode_impl_u64_under_10_digits(num, digits, buf, encode_table) }
+        unsafe { encode_impl_u64_under_10_digits(num, digits, buf, encode_pairs) }
     }
 }
 
@@ -733,7 +779,7 @@ unsafe fn encode_impl_u64_under_10_digits(
     mut num: u64,
     digits: usize,
     buf: &mut [u8],
-    encode_table: &[u8; 62],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
 ) -> usize {
     let mut write_idx = digits;
     let mut digit_index = 0_usize;
@@ -745,7 +791,7 @@ unsafe fn encode_impl_u64_under_10_digits(
         num /= BASE;
 
         unsafe {
-            *buf.get_unchecked_mut(write_idx) = *encode_table.get_unchecked(remainder as usize);
+            *buf.get_unchecked_mut(write_idx) = encode_pairs.get_unchecked(remainder as usize)[1];
         }
 
         digit_index = digit_index.wrapping_add(1);
@@ -754,29 +800,48 @@ unsafe fn encode_impl_u64_under_10_digits(
     digits
 }
 
-unsafe fn encode_impl_u64_10_digits(num: u64, buf: &mut [u8], encode_table: &[u8; 62]) -> usize {
+unsafe fn encode_impl_u64_10_digits(
+    num: u64,
+    buf: &mut [u8],
+    encode_pairs: &[[u8; 2]; BASE_TO_2 as usize],
+) -> usize {
     let first_u32 = (num / BASE_TO_5) as u32;
     let second_u32 = (num % BASE_TO_5) as u32;
 
-    // [AAAAA, BBBBB]
+    // [ABCDE, FGHIJ]
     let mut nums = [first_u32, second_u32];
-    const STARTING_WRITE_IDXS: [usize; 2] = [5, 10];
-
-    for i in 0..5 {
-        nums.iter_mut()
-            .zip(STARTING_WRITE_IDXS)
-            .for_each(|(num, starting_write_idx)| {
-                let quotient = num.wrapping_div(BASE as u32);
-                let remainder = (*num - (BASE as u32) * quotient) as usize;
-                *num = quotient;
-
-                unsafe {
-                    *buf.get_unchecked_mut(starting_write_idx - i - 1) =
-                        *encode_table.get_unchecked(remainder)
-                }
-            });
+    const BASE_POSITIONS: [usize; 2] = [0, 5];
+
+    for pair_idx in 0..2 {
+        nums.iter_mut().zip(BASE_POSITIONS).for_each(|(num, base)| {
+            // pair_idx 0: ABC, FGH
+            // pair_idx 1: A, F
+            let quotient = *num / BASE_TO_2 as u32;
+            // pair_idx 0: DE, IJ
+            // pair_idx 1: BC, GH
+            let pair = (*num - BASE_TO_2 as u32 * quotient) as usize;
+            *num = quotient;
+
+            // Write positions: base+3,base+4 for pair_idx=0; base+1,base+2 for pair_idx=1
+            let pos = base + 4 - 2 * pair_idx;
+            unsafe {
+                // pair_idx 0: [D, E], [I, J]
+                // pair_idx 1: [B, C], [G, H]
+                let [c1, c2] = *encode_pairs.get_unchecked(pair);
+                *buf.get_unchecked_mut(pos - 1) = c1;
+                *buf.get_unchecked_mut(pos) = c2;
+            }
+        });
     }
 
+    // A, F
+    nums.iter()
+        .zip(BASE_POSITIONS)
+        .for_each(|(num, base)| unsafe {
+            // num is now a single digit (0-61), use second byte of pair entry (the low digit)
+            *buf.get_unchecked_mut(base) = encode_pairs.get_unchecked(*num as usize)[1];
+        });
+
     10
 }
 
@@ -808,11 +873,11 @@ const fn mulh(x: u128, y: u128) -> u128 {
 }
 
 unsafe fn _encode_buf(num: u128, digits: usize, buf: &mut [u8]) -> usize {
-    unsafe { encode_impl(num, digits, buf, &STANDARD_TABLES.encode) }
+    unsafe { encode_impl(num, digits, buf, &STANDARD_TABLES.encode_pairs) }
 }
 
 unsafe fn _encode_alternative_buf(num: u128, digits: usize, buf: &mut [u8]) -> usize {
-    unsafe { encode_impl(num, digits, buf, &ALTERNATIVE_TABLES.encode) }
+    unsafe { encode_impl(num, digits, buf, &ALTERNATIVE_TABLES.encode_pairs) }
 }
 
 #[cfg(feature = "alloc")]