Skip to content

Commit c21acda

Browse files
committed
common/bitutil: optimize TestBytes
1 parent a9eaf2f commit c21acda

File tree

5 files changed

+184
-25
lines changed

5 files changed

+184
-25
lines changed

common/bitutil/bitutil.go

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -120,31 +120,7 @@ func safeORBytes(dst, a, b []byte) int {
120120

121121
// TestBytes tests whether any bit is set in the input byte slice.
122122
func TestBytes(p []byte) bool {
123-
if supportsUnaligned {
124-
return fastTestBytes(p)
125-
}
126-
return safeTestBytes(p)
127-
}
128-
129-
// fastTestBytes tests for set bits in bulk. It only works on architectures that
130-
// support unaligned read/writes.
131-
func fastTestBytes(p []byte) bool {
132-
n := len(p)
133-
w := n / wordSize
134-
if w > 0 {
135-
pw := *(*[]uintptr)(unsafe.Pointer(&p))
136-
for i := 0; i < w; i++ {
137-
if pw[i] != 0 {
138-
return true
139-
}
140-
}
141-
}
142-
for i := n - n%wordSize; i < n; i++ {
143-
if p[i] != 0 {
144-
return true
145-
}
146-
}
147-
return false
123+
return testBytes(p)
148124
}
149125

150126
// safeTestBytes tests for set bits one byte at a time. It works on all

common/bitutil/test_amd64.s

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_amd64.s
5+
6+
//go:build !purego
7+
8+
#include "textflag.h"
9+
10+
// func testBytesASM(p *byte, n int) bool
11+
TEXT ·testBytesASM(SB), NOSPLIT, $0
12+
MOVQ p+0(FP), SI
13+
MOVQ n+8(FP), DX
14+
TESTQ DX, DX // if len is 0, return false
15+
JZ not_found
16+
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
17+
JNZ not_aligned
18+
19+
aligned:
20+
MOVQ $0, AX // position in slice
21+
22+
PCALIGN $16
23+
loop16b:
24+
MOVOU (SI)(AX*1), X0 // Load 16 bytes
25+
PTEST X0, X0 // Test if all bits are zero (ZF=1 if all zero)
26+
JNZ found // If any bit is set (ZF=0), jump to found
27+
ADDQ $16, AX
28+
CMPQ DX, AX
29+
JNE loop16b
30+
JMP not_found
31+
32+
PCALIGN $16
33+
loop_1b:
34+
SUBQ $1, DX // Test 1 byte backwards.
35+
MOVB (SI)(DX*1), DI
36+
TESTB DI, DI // Test if byte is non-zero
37+
JNZ found
38+
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
39+
JNZ loop_1b
40+
CMPQ DX, $0 // if len is 0, ret.
41+
JE not_found
42+
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
43+
JZ aligned
44+
45+
not_aligned:
46+
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
47+
JNE loop_1b
48+
SUBQ $8, DX // Test 8 bytes backwards.
49+
MOVQ (SI)(DX*1), DI
50+
TESTQ DI, DI // Test if 8 bytes are non-zero
51+
JNZ found
52+
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
53+
JGE aligned
54+
JMP not_found
55+
56+
not_found:
57+
MOVB $0, ret+16(FP)
58+
RET
59+
60+
found:
61+
MOVB $1, ret+16(FP)
62+
RET
63+

common/bitutil/test_arm64.s

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_arm64.s
5+
6+
//go:build !purego
7+
8+
#include "textflag.h"
9+
10+
// func testBytesASM(p *byte, n int) bool
11+
TEXT ·testBytesASM(SB), NOSPLIT|NOFRAME, $0
12+
MOVD p+0(FP), R0
13+
MOVD n+8(FP), R1
14+
CMP $64, R1
15+
BLT tail
16+
loop_64:
17+
VLD1.P 64(R0), [V0.B16, V1.B16, V2.B16, V3.B16]
18+
// OR all vectors together to check if any byte is non-zero
19+
VORR V0.B16, V1.B16, V4.B16
20+
VORR V2.B16, V3.B16, V5.B16
21+
VORR V4.B16, V5.B16, V6.B16
22+
// Check if any byte in V6 is non-zero by checking both 64-bit halves
23+
VMOV V6.D[0], R2
24+
VMOV V6.D[1], R3
25+
ORR R2, R3, R2
26+
CBNZ R2, found
27+
SUBS $64, R1
28+
CMP $64, R1
29+
BGE loop_64
30+
tail:
31+
// quick end
32+
CBZ R1, not_found
33+
TBZ $5, R1, less_than32
34+
VLD1.P 32(R0), [V0.B16, V1.B16]
35+
VORR V0.B16, V1.B16, V2.B16
36+
VMOV V2.D[0], R2
37+
VMOV V2.D[1], R3
38+
ORR R2, R3, R2
39+
CBNZ R2, found
40+
less_than32:
41+
TBZ $4, R1, less_than16
42+
LDP.P 16(R0), (R11, R12)
43+
ORR R11, R12, R2
44+
CBNZ R2, found
45+
less_than16:
46+
TBZ $3, R1, less_than8
47+
MOVD.P 8(R0), R11
48+
CBNZ R11, found
49+
less_than8:
50+
TBZ $2, R1, less_than4
51+
MOVWU.P 4(R0), R11
52+
CBNZ R11, found
53+
less_than4:
54+
TBZ $1, R1, less_than2
55+
MOVHU.P 2(R0), R11
56+
CBNZ R11, found
57+
less_than2:
58+
TBZ $0, R1, not_found
59+
MOVBU (R0), R11
60+
CBNZ R11, found
61+
not_found:
62+
MOVD $0, R0
63+
MOVB R0, ret+16(FP)
64+
RET
65+
found:
66+
MOVD $1, R0
67+
MOVB R0, ret+16(FP)
68+
RET

common/bitutil/test_asm.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
// inspired by: https://github.com/golang/go/blob/4a3cef2036097d323b6cc0bbe90fc4d8c7588660/src/crypto/internal/fips140/subtle/xor_asm.go
5+
6+
//go:build (amd64 || arm64) && !purego
7+
8+
package bitutil
9+
10+
func testBytes(p []byte) bool {
11+
return testBytesASM(&p[0], len(p))
12+
}
13+
14+
//go:noescape
15+
func testBytesASM(p *byte, n int) bool

common/bitutil/test_generic.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2025 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
//go:build (!amd64 && !arm64) || purego
6+
7+
package bitutil
8+
9+
import "unsafe"
10+
11+
func testBytes(p []byte) bool {
12+
if supportsUnaligned {
13+
return fastTestBytes(p)
14+
}
15+
return safeTestBytes(p)
16+
}
17+
18+
// fastTestBytes tests for set bits in bulk. It only works on architectures that
19+
// support unaligned read/writes.
20+
func fastTestBytes(p []byte) bool {
21+
n := len(p)
22+
w := n / wordSize
23+
if w > 0 {
24+
pw := *(*[]uintptr)(unsafe.Pointer(&p))
25+
for i := 0; i < w; i++ {
26+
if pw[i] != 0 {
27+
return true
28+
}
29+
}
30+
}
31+
for i := n - n%wordSize; i < n; i++ {
32+
if p[i] != 0 {
33+
return true
34+
}
35+
}
36+
return false
37+
}

0 commit comments

Comments
 (0)