From 8270b6502b8b46599dfad32ea128dedc50a585fa Mon Sep 17 00:00:00 2001 From: Mr Word Date: Thu, 21 Nov 2019 00:31:26 -0500 Subject: [PATCH 1/2] base32cx multibase rfc --- multibase.csv | 2 ++ rfcs/Base32CaseCheck.md | 63 +++++++++++++++++++++++++++++++++++++++++ tests/test1.csv | 2 ++ tests/test2.csv | 2 ++ tests/test3.csv | 2 ++ tests/test4.csv | 2 ++ tests/test5.csv | 2 ++ 7 files changed, 75 insertions(+) create mode 100644 rfcs/Base32CaseCheck.md diff --git a/multibase.csv b/multibase.csv index d980df7..7ad33f2 100644 --- a/multibase.csv +++ b/multibase.csv @@ -14,6 +14,8 @@ base32upper, B, rfc4648 no padding, base32pad, c, rfc4648 with padding, candidate base32padupper, C, rfc4648 with padding, candidate base32z, h, z-base-32 (used by Tahoe-LAFS), draft +base32cx x, base32 over [4-9A-Z] with [a-z] letter-case checksum draft +base32ux X, base32cx using only uppercase (no checksum) draft base58flickr, Z, base58 flicker, candidate base58btc, z, base58 bitcoin, default base64, m, rfc4648 no padding, default diff --git a/rfcs/Base32CaseCheck.md b/rfcs/Base32CaseCheck.md new file mode 100644 index 0000000..3b8ab9d --- /dev/null +++ b/rfcs/Base32CaseCheck.md @@ -0,0 +1,63 @@ +### Intro + +`base32cx` is a base-32 encoding with letter-case checksums inspired by the success of Ethereum’s EIP55. + +It is designed for encoding relatively short byte strings presented as human-skimmable strings, like file hashes or cryptocurrency addresses. + +The alphabet maximizes the number of alpha characters to increase the average number of checksum bits per string. + +`base32cx` has a variant, `base32ux`, which is the same alphabet without a checksum. + +The unchecked variant `base32ux` has the property that a lexical sort of encoded data is a bitwise sort of decoded data, like `base32hex`. + +### Alphabet + +``` + base32cx alphabet + + value: 0,1,2,3,4,5,[6..31] + encoding: 4,5,6,7,8,9,[A..Z] + lowered: 4,5,6,7,8,9,[a..z] + +value encoding value encoding value encoding value encoding +----- -------- ----- -------- ----- -------- ----- -------- + 0 4 8 C (c) 16 K (k) 24 S (s) + 1 5 9 D (d) 17 L (l) 25 T (t) + 2 6 10 E (e) 18 M (m) 26 U (u) + 3 7 11 F (f) 19 N (n) 27 V (v) + 4 8 12 G (g) 20 O (o) 28 W (w) + 5 9 13 H (h) 21 P (p) 29 X (x) + 6 A (a) 14 I (i) 22 Q (q) 30 Y (y) + 7 B (b) 15 J (j) 23 R (r) 31 Z (z) +``` + +### Checksum + +To checksum, take the sha256 of the data to be encoded. Call this hash `CHECK`. + +Encode the bytes using the alphabet above. (Hint: It's a drop-in replacement for `base32hex`). + +Lowercase the i'th character of encoded string if the (i % 256)‘th bit of `CHECK` is a 0. +Keep it uppercased if it is a 1. + +### Example + +``` +encode("Hello") + + encoding result note + + base32cx d5mQSv7j appears mixed / passes checksum + base32ux D5MQSV7j appears uniform / fails checksum + none d5mqsv7j appears mixed / fails checksum, uppercased might be base32ux +``` + +Uppercase letters are chosen for the unchecked variant because the numeric characters are “tall”. +This makes unchecked data appear uniform while checked data appears mixed-height. + +### Maximum size + +`base32cx` is only defined for byte sequences up to length 2^20 - 1, that is, one byte less than 1 MiB. +It is most likely not an appropriate choice of encoding for larger data. +For completeness, a standard method for hashing large data and applying the checksum in chunks will be specified in the future. +Until then, base32cx is simply not defined if the data to be encoded is longer than 2^20 - 1 bytes. diff --git a/tests/test1.csv b/tests/test1.csv index 44d6003..5690fd3 100644 --- a/tests/test1.csv +++ b/tests/test1.csv @@ -13,6 +13,8 @@ base32padupper, "CIRSWGZLOORZGC3DJPJSSAZLWMVZHS5DINFXGOIJB" base32hexpad, "t8him6pbeehp62r39f9ii0pbmclp7it38d5n6e891" base32hexpadupper, "T8HIM6PBEEHP62R39F9II0PBMCLP7IT38D5N6E891" base32z, "het1sg3mqqt3gn5djxj11y3msci3817depfzgqejb" +base32cx, "xcLMQaTFIiltA6v7dJDmM4TfqGPtbmx7CH9Raicd5" +base32ux, "XCLMQATFIILTA6V7DJDMM4TFQGPTBMX7CH9RAICD5" base58flickr, "Ztwe7gVTeK8wswS1gf8hrgAua9fcw9reboD" base58btc, "zUXE7GvtEk8XTXs1GF8HSGbVA9FCX9SEBPe" base64, "mRGVjZW50cmFsaXplIGV2ZXJ5dGhpbmchIQ" diff --git a/tests/test2.csv b/tests/test2.csv index 97ecccb..b44c252 100644 --- a/tests/test2.csv +++ b/tests/test2.csv @@ -13,6 +13,8 @@ base32padupper, "CPFSXGIDNMFXGSIBB" base32hexpad, "tf5in683dc5n6i811" base32hexpadupper, "TF5IN683DC5N6I811" base32z, "hxf1zgedpcfzg1ebb" +base32cx, "xj9MRAC7hg9ramc55" +base32ux, "XJ9MRAC7HG9RAMC55" base58flickr, "Z7Pznk19XTTzBtx" base58btc, "z7paNL19xttacUY" base64, "meWVzIG1hbmkgIQ" diff --git a/tests/test3.csv b/tests/test3.csv index 4bfbc5e..39e5f65 100644 --- a/tests/test3.csv +++ b/tests/test3.csv @@ -13,6 +13,8 @@ base32padupper, "CNBSWY3DPEB3W64TMMQ======" base32hexpad, "td1imor3f41rmusjccg======" base32hexpadupper, "TD1IMOR3F41RMUSJCCG======" base32z, "hpb1sa5dxrb5s6hucco" +base32cx, "xH5MQSv7J85vqYWnGgk" +base32ux, "XH5MQSV7J85VQYWNGGK" base58flickr, "ZrTu1dk6cWsRYjYu" base58btc, "zStV1DL6CwTryKyV" base64, "maGVsbG8gd29ybGQ" diff --git a/tests/test4.csv b/tests/test4.csv index e02f128..01e631d 100644 --- a/tests/test4.csv +++ b/tests/test4.csv @@ -13,6 +13,8 @@ base32padupper, "CAB4WK4ZANVQW42JAEE======" base32hexpad, "t01smasp0dlgmsq9044======" base32hexpadupper, "T01SMASP0DLGMSQ9044======" base32z, "hybhskh3ypiosh4jyrr" +base32cx, "x45WQewt4HPKqwud488" +base32ux, "X45WQEWT4HPKQWUD488" base58flickr, "Z17Pznk19XTTzBtx" base58btc, "z17paNL19xttacUY" base64, "mAHllcyBtYW5pICE" diff --git a/tests/test5.csv b/tests/test5.csv index 9f70104..25ae41d 100644 --- a/tests/test5.csv +++ b/tests/test5.csv @@ -13,6 +13,8 @@ base32padupper, "CAAAHSZLTEBWWC3TJEAQQ====" base32hexpad, "t0007ipbj41mm2rj940gg====" base32hexpadupper, "T0007IPBJ41MM2RJ940GG====" base32z, "hyyy813murbssn5ujryoo" +base32cx, "x444BmtFN85qQ6vnD84Kk" +base32ux, "X444BMTFN85QQ6VND84KK" base58flickr, "Z117Pznk19XTTzBtx" base58btc, "z117paNL19xttacUY" base64, "mAAB5ZXMgbWFuaSAh" From 0c379380ec19d601a936b06f982f4946bbb66631 Mon Sep 17 00:00:00 2001 From: Mr Word Date: Thu, 21 Nov 2019 00:34:59 -0500 Subject: [PATCH 2/2] add link to eip55 --- rfcs/Base32CaseCheck.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rfcs/Base32CaseCheck.md b/rfcs/Base32CaseCheck.md index 3b8ab9d..a22acf7 100644 --- a/rfcs/Base32CaseCheck.md +++ b/rfcs/Base32CaseCheck.md @@ -1,6 +1,6 @@ ### Intro -`base32cx` is a base-32 encoding with letter-case checksums inspired by the success of Ethereum’s EIP55. +`base32cx` is a base-32 encoding with letter-case checksums inspired by the success of Ethereum’s [EIP55](https://github.com/ethereum/EIPs/blob/master/EIPS/eip-55.md). It is designed for encoding relatively short byte strings presented as human-skimmable strings, like file hashes or cryptocurrency addresses. @@ -61,3 +61,7 @@ This makes unchecked data appear uniform while checked data appears mixed-height It is most likely not an appropriate choice of encoding for larger data. For completeness, a standard method for hashing large data and applying the checksum in chunks will be specified in the future. Until then, base32cx is simply not defined if the data to be encoded is longer than 2^20 - 1 bytes. + +### Source + +The spec was originally posted and is being maintained [here](https://word.site/2019/11/13/base32cx/).