From 5f985ca58c12a2db4b6e1a281e458d3701507305 Mon Sep 17 00:00:00 2001 From: ICHINOSE Shogo Date: Fri, 1 Mar 2024 23:20:22 +0900 Subject: [PATCH 1/5] optimize parsing ulid --- ulid.go | 149 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 75 insertions(+), 74 deletions(-) diff --git a/ulid.go b/ulid.go index 2064dd4..64ca7ee 100644 --- a/ulid.go +++ b/ulid.go @@ -175,35 +175,39 @@ func parse(v []byte, strict bool, id *ULID) error { return ErrDataSize } + // Use an optimized unrolled loop to decode a base32 ULID. + h := uint64(dec[v[0]])<<45 | + uint64(dec[v[1]])<<40 | + uint64(dec[v[2]])<<35 | + uint64(dec[v[3]])<<30 | + uint64(dec[v[4]])<<25 | + uint64(dec[v[5]])<<20 | + uint64(dec[v[6]])<<15 | + uint64(dec[v[7]])<<10 | + uint64(dec[v[8]])<<5 | + uint64(dec[v[9]]) + + m := uint64(dec[v[10]])<<35 | + uint64(dec[v[11]])<<30 | + uint64(dec[v[12]])<<25 | + uint64(dec[v[13]])<<20 | + uint64(dec[v[14]])<<15 | + uint64(dec[v[15]])<<10 | + uint64(dec[v[16]])<<5 | + uint64(dec[v[17]]) + + l := uint64(dec[v[18]])<<35 | + uint64(dec[v[19]])<<30 | + uint64(dec[v[20]])<<25 | + uint64(dec[v[21]])<<20 | + uint64(dec[v[22]])<<15 | + uint64(dec[v[23]])<<10 | + uint64(dec[v[24]])<<5 | + uint64(dec[v[25]]) + // Check if all the characters in a base32 encoded ULID are part of the // expected base32 character set. - if strict && - (dec[v[0]] == 0xFF || - dec[v[1]] == 0xFF || - dec[v[2]] == 0xFF || - dec[v[3]] == 0xFF || - dec[v[4]] == 0xFF || - dec[v[5]] == 0xFF || - dec[v[6]] == 0xFF || - dec[v[7]] == 0xFF || - dec[v[8]] == 0xFF || - dec[v[9]] == 0xFF || - dec[v[10]] == 0xFF || - dec[v[11]] == 0xFF || - dec[v[12]] == 0xFF || - dec[v[13]] == 0xFF || - dec[v[14]] == 0xFF || - dec[v[15]] == 0xFF || - dec[v[16]] == 0xFF || - dec[v[17]] == 0xFF || - dec[v[18]] == 0xFF || - dec[v[19]] == 0xFF || - dec[v[20]] == 0xFF || - dec[v[21]] == 0xFF || - dec[v[22]] == 0xFF || - dec[v[23]] == 0xFF || - dec[v[24]] == 0xFF || - dec[v[25]] == 0xFF) { + if strict && (h|m|l)&(1<<63) != 0 { return ErrInvalidCharacters } @@ -216,28 +220,25 @@ func parse(v []byte, strict bool, id *ULID) error { return ErrOverflow } - // Use an optimized unrolled loop (from https://github.com/RobThree/NUlid) - // to decode a base32 ULID. - // 6 bytes timestamp (48 bits) - (*id)[0] = (dec[v[0]] << 5) | dec[v[1]] - (*id)[1] = (dec[v[2]] << 3) | (dec[v[3]] >> 2) - (*id)[2] = (dec[v[3]] << 6) | (dec[v[4]] << 1) | (dec[v[5]] >> 4) - (*id)[3] = (dec[v[5]] << 4) | (dec[v[6]] >> 1) - (*id)[4] = (dec[v[6]] << 7) | (dec[v[7]] << 2) | (dec[v[8]] >> 3) - (*id)[5] = (dec[v[8]] << 5) | dec[v[9]] + (*id)[0] = byte(h >> 40) + (*id)[1] = byte(h >> 32) + (*id)[2] = byte(h >> 24) + (*id)[3] = byte(h >> 16) + (*id)[4] = byte(h >> 8) + (*id)[5] = byte(h) // 10 bytes of entropy (80 bits) - (*id)[6] = (dec[v[10]] << 3) | (dec[v[11]] >> 2) - (*id)[7] = (dec[v[11]] << 6) | (dec[v[12]] << 1) | (dec[v[13]] >> 4) - (*id)[8] = (dec[v[13]] << 4) | (dec[v[14]] >> 1) - (*id)[9] = (dec[v[14]] << 7) | (dec[v[15]] << 2) | (dec[v[16]] >> 3) - (*id)[10] = (dec[v[16]] << 5) | dec[v[17]] - (*id)[11] = (dec[v[18]] << 3) | dec[v[19]]>>2 - (*id)[12] = (dec[v[19]] << 6) | (dec[v[20]] << 1) | (dec[v[21]] >> 4) - (*id)[13] = (dec[v[21]] << 4) | (dec[v[22]] >> 1) - (*id)[14] = (dec[v[22]] << 7) | (dec[v[23]] << 2) | (dec[v[24]] >> 3) - (*id)[15] = (dec[v[24]] << 5) | dec[v[25]] + (*id)[6] = byte(m >> 32) + (*id)[7] = byte(m >> 24) + (*id)[8] = byte(m >> 16) + (*id)[9] = byte(m >> 8) + (*id)[10] = byte(m) + (*id)[11] = byte(l >> 32) + (*id)[12] = byte(l >> 24) + (*id)[13] = byte(l >> 16) + (*id)[14] = byte(l >> 8) + (*id)[15] = byte(l) return nil } @@ -360,34 +361,34 @@ func (id ULID) MarshalTextTo(dst []byte) error { } // Byte to index table for O(1) lookups when unmarshaling. -// We use 0xFF as sentinel value for invalid indexes. -var dec = [...]byte{ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, - 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, - 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, 0x1D, 0x1E, - 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, - 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, - 0x15, 0xFF, 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, - 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +// We use -1 (all bits are 1) as sentinel value for invalid indexes. +var dec = [...]int8{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, -1, -1, + -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, + 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, 0x15, -1, + 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, -1, -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, + 0x15, -1, 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, + 0x1D, 0x1E, 0x1F, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, } // EncodedSize is the length of a text encoded ULID. From a67312a95790d558ef3a74caf60d5d707dd73844 Mon Sep 17 00:00:00 2001 From: Peter Bourgon Date: Wed, 13 Mar 2024 14:50:27 +0100 Subject: [PATCH 2/5] s/-1/0xFF/ --- ulid.go | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/ulid.go b/ulid.go index 64ca7ee..d7b99d3 100644 --- a/ulid.go +++ b/ulid.go @@ -361,34 +361,34 @@ func (id ULID) MarshalTextTo(dst []byte) error { } // Byte to index table for O(1) lookups when unmarshaling. -// We use -1 (all bits are 1) as sentinel value for invalid indexes. +// We use 0xFF as sentinel value for invalid indexes. var dec = [...]int8{ - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 0x00, 0x01, - 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, -1, -1, - -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, - 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, 0x15, -1, - 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, 0x1D, 0x1E, - 0x1F, -1, -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, - 0x0D, 0x0E, 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, - 0x15, -1, 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, - 0x1D, 0x1E, 0x1F, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, + 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF, + 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, + 0x15, 0xFF, 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, + 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, } // EncodedSize is the length of a text encoded ULID. From 2ac659b7bdf5bfee26c9212195086f58254c9f72 Mon Sep 17 00:00:00 2001 From: Peter Bourgon Date: Wed, 13 Mar 2024 14:51:33 +0100 Subject: [PATCH 3/5] s/int8/byte/ --- ulid.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ulid.go b/ulid.go index d7b99d3..73eb6b8 100644 --- a/ulid.go +++ b/ulid.go @@ -362,7 +362,7 @@ func (id ULID) MarshalTextTo(dst []byte) error { // Byte to index table for O(1) lookups when unmarshaling. // We use 0xFF as sentinel value for invalid indexes. -var dec = [...]int8{ +var dec = [...]byte{ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, From 7414e91e2f912acb7f587aa3d561f775d997dc2a Mon Sep 17 00:00:00 2001 From: Peter Bourgon Date: Wed, 13 Mar 2024 15:03:27 +0100 Subject: [PATCH 4/5] Revert my changes --- ulid.go | 54 +++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/ulid.go b/ulid.go index 73eb6b8..40a0bec 100644 --- a/ulid.go +++ b/ulid.go @@ -362,33 +362,33 @@ func (id ULID) MarshalTextTo(dst []byte) error { // Byte to index table for O(1) lookups when unmarshaling. // We use 0xFF as sentinel value for invalid indexes. -var dec = [...]byte{ - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, - 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, - 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, 0x15, 0xFF, - 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, 0x1D, 0x1E, - 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, - 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0xFF, 0x12, 0x13, 0xFF, 0x14, - 0x15, 0xFF, 0x16, 0x17, 0x18, 0x19, 0x1A, 0xFF, 0x1B, 0x1C, - 0x1D, 0x1E, 0x1F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, +var dec = [...]int8{ + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, -1, -1, + -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, + 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, 0x15, -1, + 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, 0x1D, 0x1E, + 0x1F, -1, -1, -1, -1, -1, -1, 0x0A, 0x0B, 0x0C, + 0x0D, 0x0E, 0x0F, 0x10, 0x11, -1, 0x12, 0x13, -1, 0x14, + 0x15, -1, 0x16, 0x17, 0x18, 0x19, 0x1A, -1, 0x1B, 0x1C, + 0x1D, 0x1E, 0x1F, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, } // EncodedSize is the length of a text encoded ULID. From 25fa2cbf9c316bf88ebb824aa98dc775406f1629 Mon Sep 17 00:00:00 2001 From: ICHINOSE Shogo Date: Sun, 7 Apr 2024 22:28:29 +0900 Subject: [PATCH 5/5] Add some comments about the reason for using -1. --- ulid.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/ulid.go b/ulid.go index 40a0bec..f6972f1 100644 --- a/ulid.go +++ b/ulid.go @@ -176,6 +176,23 @@ func parse(v []byte, strict bool, id *ULID) error { } // Use an optimized unrolled loop to decode a base32 ULID. + // The MSB(Most Significant Bit) is reserved for detecting invalid indexes. + // + // For example, in normal case, the bit layout of uint64(dec[v[0]])<<45 becomes: + // + // | 63 | 62 | 61 | 60 | 59 | 58 | 57 | 56 | 55 | 54 | 53 | 52 | 51 | 50 | 49 | 48 | 47 | 46 | 45 | 44 | ... | + // |----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|-----| + // | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | x | x | x | x | x | 0 | ... | + // + // and the MSB is set to 0. + // + // If the character is not part of the base32 character set, the layout becomes: + // + // | 63 | 62 | 61 | 60 | 59 | 58 | 57 | 56 | 55 | 54 | 53 | 52 | 51 | 50 | 49 | 48 | 47 | 46 | 45 | 44 | ... | + // |----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|----|-----| + // | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | ... | + // + // and the MSB is set to 1. h := uint64(dec[v[0]])<<45 | uint64(dec[v[1]])<<40 | uint64(dec[v[2]])<<35 | @@ -361,7 +378,9 @@ func (id ULID) MarshalTextTo(dst []byte) error { } // Byte to index table for O(1) lookups when unmarshaling. -// We use 0xFF as sentinel value for invalid indexes. +// We use -1 (all bits are set to 1) as sentinel value for invalid indexes. +// The reason for using -1 is that, even when cast, it does not lose the property that all bits are set to 1. +// e.g. uint64(int8(-1)) == 0xFFFFFFFFFFFFFFFF var dec = [...]int8{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,