From 2398d13ffb96fec4271646755fead8ec2eaf56e8 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 11 Dec 2023 16:35:57 +0100 Subject: [PATCH 1/5] device: do atomic 64-bit add outside of vector loop Only bother updating the rxBytes counter once we've processed a whole vector, since additions are atomic. cherry picked from commit WireGuard/wireguard-go@542e565baa776ed4c5c55b73ef9aa38d33d55197 Updates tailscale/corp#28879 Signed-off-by: Jason A. Donenfeld --- device/receive.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/device/receive.go b/device/receive.go index af2db44e8..01804b7f3 100644 --- a/device/receive.go +++ b/device/receive.go @@ -447,6 +447,7 @@ func (peer *Peer) RoutineSequentialReceiver(maxBatchSize int) { elemsContainer.Lock() validTailPacket := -1 dataPacketReceived := false + rxBytesLen := uint64(0) for i, elem := range elemsContainer.elems { if elem.packet == nil { // decryption failed @@ -463,7 +464,7 @@ func (peer *Peer) RoutineSequentialReceiver(maxBatchSize int) { peer.timersHandshakeComplete() peer.SendStagedPackets() } - peer.rxBytes.Add(uint64(len(elem.packet) + MinMessageSize)) + rxBytesLen += uint64(len(elem.packet) + MinMessageSize) if len(elem.packet) == 0 { device.log.Verbosef("%v - Receiving keepalive packet", peer) @@ -512,6 +513,8 @@ func (peer *Peer) RoutineSequentialReceiver(maxBatchSize int) { bufs = append(bufs, elem.buffer[:MessageTransportOffsetContent+len(elem.packet)]) } + + peer.rxBytes.Add(rxBytesLen) if validTailPacket >= 0 { peer.SetEndpointFromPacket(elemsContainer.elems[validTailPacket].endpoint) peer.keepKeyFreshReceiving() From 831ddd5ef5d48c4fa489bff23c58d86afd116d85 Mon Sep 17 00:00:00 2001 From: Martin Basovnik Date: Fri, 10 Nov 2023 11:10:12 +0100 Subject: [PATCH 2/5] device: fix possible deadlock in close method There is a possible deadlock in `device.Close()` when you try to close the device very soon after its start. The problem is that two different methods acquire the same locks in different order: 1. device.Close() - device.ipcMutex.Lock() - device.state.Lock() 2. device.changeState(deviceState) - device.state.Lock() - device.ipcMutex.Lock() Reproducer: func TestDevice_deadlock(t *testing.T) { d := randDevice(t) d.Close() } Problem: $ go clean -testcache && go test -race -timeout 3s -run TestDevice_deadlock ./device | grep -A 10 sync.runtime_SemacquireMutex sync.runtime_SemacquireMutex(0xc000117d20?, 0x94?, 0x0?) /usr/local/opt/go/libexec/src/runtime/sema.go:77 +0x25 sync.(*Mutex).lockSlow(0xc000130518) /usr/local/opt/go/libexec/src/sync/mutex.go:171 +0x213 sync.(*Mutex).Lock(0xc000130518) /usr/local/opt/go/libexec/src/sync/mutex.go:90 +0x55 golang.zx2c4.com/wireguard/device.(*Device).Close(0xc000130500) /Users/martin.basovnik/git/basovnik/wireguard-go/device/device.go:373 +0xb6 golang.zx2c4.com/wireguard/device.TestDevice_deadlock(0x0?) /Users/martin.basovnik/git/basovnik/wireguard-go/device/device_test.go:480 +0x2c testing.tRunner(0xc00014c000, 0x131d7b0) -- sync.runtime_SemacquireMutex(0xc000130564?, 0x60?, 0xc000130548?) /usr/local/opt/go/libexec/src/runtime/sema.go:77 +0x25 sync.(*Mutex).lockSlow(0xc000130750) /usr/local/opt/go/libexec/src/sync/mutex.go:171 +0x213 sync.(*Mutex).Lock(0xc000130750) /usr/local/opt/go/libexec/src/sync/mutex.go:90 +0x55 sync.(*RWMutex).Lock(0xc000130750) /usr/local/opt/go/libexec/src/sync/rwmutex.go:147 +0x45 golang.zx2c4.com/wireguard/device.(*Device).upLocked(0xc000130500) /Users/martin.basovnik/git/basovnik/wireguard-go/device/device.go:179 +0x72 golang.zx2c4.com/wireguard/device.(*Device).changeState(0xc000130500, 0x1) cherry picked from commit WireGuard/wireguard-go@12269c2761734b15625017d8565745096325392f Updates tailscale/corp#28879 Signed-off-by: Martin Basovnik Signed-off-by: Jason A. Donenfeld --- device/device.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/device/device.go b/device/device.go index 86dff0d7e..5b2348564 100644 --- a/device/device.go +++ b/device/device.go @@ -368,10 +368,10 @@ func (device *Device) RemoveAllPeers() { } func (device *Device) Close() { - device.ipcMutex.Lock() - defer device.ipcMutex.Unlock() device.state.Lock() defer device.state.Unlock() + device.ipcMutex.Lock() + defer device.ipcMutex.Unlock() if device.isClosed() { return } From 13d2acfa7f5c863a34318aef86cc287b1c3fdc39 Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Thu, 26 Dec 2024 20:36:53 +0100 Subject: [PATCH 3/5] device: reduce RoutineHandshake allocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce allocations by eliminating byte reader, hand-rolled decoding and reusing message structs. Synthetic benchmark: var msgSink MessageInitiation func BenchmarkMessageInitiationUnmarshal(b *testing.B) { packet := make([]byte, MessageInitiationSize) reader := bytes.NewReader(packet) err := binary.Read(reader, binary.LittleEndian, &msgSink) if err != nil { b.Fatal(err) } b.Run("binary.Read", func(b *testing.B) { b.ReportAllocs() for range b.N { reader := bytes.NewReader(packet) _ = binary.Read(reader, binary.LittleEndian, &msgSink) } }) b.Run("unmarshal", func(b *testing.B) { b.ReportAllocs() for range b.N { _ = msgSink.unmarshal(packet) } }) } Results: │ - │ │ sec/op │ MessageInitiationUnmarshal/binary.Read-8 1.508µ ± 2% MessageInitiationUnmarshal/unmarshal-8 12.66n ± 2% │ - │ │ B/op │ MessageInitiationUnmarshal/binary.Read-8 208.0 ± 0% MessageInitiationUnmarshal/unmarshal-8 0.000 ± 0% │ - │ │ allocs/op │ MessageInitiationUnmarshal/binary.Read-8 2.000 ± 0% MessageInitiationUnmarshal/unmarshal-8 0.000 ± 0% cherry picked from commit WireGuard/wireguard-go@9e7529c3d2d0c54f4d5384c01645a9279e4740ae Updates tailscale/corp#28879 Signed-off-by: Alexander Yastrebov Signed-off-by: Jason A. Donenfeld --- device/noise-protocol.go | 48 ++++++++++++++++++++++++++++++++++++++++ device/receive.go | 10 +++------ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/device/noise-protocol.go b/device/noise-protocol.go index 2d8f98426..2e7e9ae33 100644 --- a/device/noise-protocol.go +++ b/device/noise-protocol.go @@ -6,6 +6,7 @@ package device import ( + "encoding/binary" "errors" "fmt" "sync" @@ -115,6 +116,53 @@ type MessageCookieReply struct { Cookie [blake2s.Size128 + poly1305.TagSize]byte } +var errMessageTooShort = errors.New("message too short") + +func (msg *MessageInitiation) unmarshal(b []byte) error { + if len(b) < MessageInitiationSize { + return errMessageTooShort + } + + msg.Type = binary.LittleEndian.Uint32(b) + msg.Sender = binary.LittleEndian.Uint32(b[4:]) + copy(msg.Ephemeral[:], b[8:]) + copy(msg.Static[:], b[8+len(msg.Ephemeral):]) + copy(msg.Timestamp[:], b[8+len(msg.Ephemeral)+len(msg.Static):]) + copy(msg.MAC1[:], b[8+len(msg.Ephemeral)+len(msg.Static)+len(msg.Timestamp):]) + copy(msg.MAC2[:], b[8+len(msg.Ephemeral)+len(msg.Static)+len(msg.Timestamp)+len(msg.MAC1):]) + + return nil +} + +func (msg *MessageResponse) unmarshal(b []byte) error { + if len(b) < MessageResponseSize { + return errMessageTooShort + } + + msg.Type = binary.LittleEndian.Uint32(b) + msg.Sender = binary.LittleEndian.Uint32(b[4:]) + msg.Receiver = binary.LittleEndian.Uint32(b[8:]) + copy(msg.Ephemeral[:], b[12:]) + copy(msg.Empty[:], b[12+len(msg.Ephemeral):]) + copy(msg.MAC1[:], b[12+len(msg.Ephemeral)+len(msg.Empty):]) + copy(msg.MAC2[:], b[12+len(msg.Ephemeral)+len(msg.Empty)+len(msg.MAC1):]) + + return nil +} + +func (msg *MessageCookieReply) unmarshal(b []byte) error { + if len(b) < MessageCookieReplySize { + return errMessageTooShort + } + + msg.Type = binary.LittleEndian.Uint32(b) + msg.Receiver = binary.LittleEndian.Uint32(b[4:]) + copy(msg.Nonce[:], b[8:]) + copy(msg.Cookie[:], b[8+len(msg.Nonce):]) + + return nil +} + type Handshake struct { state handshakeState mutex sync.RWMutex diff --git a/device/receive.go b/device/receive.go index 01804b7f3..bc37f915e 100644 --- a/device/receive.go +++ b/device/receive.go @@ -6,7 +6,6 @@ package device import ( - "bytes" "encoding/binary" "errors" "net" @@ -287,8 +286,7 @@ func (device *Device) RoutineHandshake(id int) { // unmarshal packet var reply MessageCookieReply - reader := bytes.NewReader(elem.packet) - err := binary.Read(reader, binary.LittleEndian, &reply) + err := reply.unmarshal(elem.packet) if err != nil { device.log.Verbosef("Failed to decode cookie reply") goto skip @@ -353,8 +351,7 @@ func (device *Device) RoutineHandshake(id int) { // unmarshal var msg MessageInitiation - reader := bytes.NewReader(elem.packet) - err := binary.Read(reader, binary.LittleEndian, &msg) + err := msg.unmarshal(elem.packet) if err != nil { device.log.Errorf("Failed to decode initiation message") goto skip @@ -386,8 +383,7 @@ func (device *Device) RoutineHandshake(id int) { // unmarshal var msg MessageResponse - reader := bytes.NewReader(elem.packet) - err := binary.Read(reader, binary.LittleEndian, &msg) + err := msg.unmarshal(elem.packet) if err != nil { device.log.Errorf("Failed to decode response message") goto skip From 6374889662b854d8aea8c3e03fcea802e3dac673 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 15 May 2025 16:48:14 +0200 Subject: [PATCH 4/5] device: make unmarshall length checks exact This is already enforced in receive.go, but if these unmarshallers are to have error return values anyway, make them as explicit as possible. cherry picked from commit WireGuard/wireguard-go@842888ac5c93ccc5ee6344eceaadf783fcf1e243 Updates tailscale/corp#28879 Signed-off-by: Jason A. Donenfeld --- device/noise-protocol.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/device/noise-protocol.go b/device/noise-protocol.go index 2e7e9ae33..1e99f6886 100644 --- a/device/noise-protocol.go +++ b/device/noise-protocol.go @@ -116,11 +116,11 @@ type MessageCookieReply struct { Cookie [blake2s.Size128 + poly1305.TagSize]byte } -var errMessageTooShort = errors.New("message too short") +var errMessageLengthMismatch = errors.New("message length mismatch") func (msg *MessageInitiation) unmarshal(b []byte) error { - if len(b) < MessageInitiationSize { - return errMessageTooShort + if len(b) != MessageInitiationSize { + return errMessageLengthMismatch } msg.Type = binary.LittleEndian.Uint32(b) @@ -135,8 +135,8 @@ func (msg *MessageInitiation) unmarshal(b []byte) error { } func (msg *MessageResponse) unmarshal(b []byte) error { - if len(b) < MessageResponseSize { - return errMessageTooShort + if len(b) != MessageResponseSize { + return errMessageLengthMismatch } msg.Type = binary.LittleEndian.Uint32(b) @@ -151,8 +151,8 @@ func (msg *MessageResponse) unmarshal(b []byte) error { } func (msg *MessageCookieReply) unmarshal(b []byte) error { - if len(b) < MessageCookieReplySize { - return errMessageTooShort + if len(b) != MessageCookieReplySize { + return errMessageLengthMismatch } msg.Type = binary.LittleEndian.Uint32(b) From c7e640dcb72b0a0b199812d69c6f05885dcac904 Mon Sep 17 00:00:00 2001 From: Alexander Yastrebov Date: Sat, 17 May 2025 11:34:30 +0200 Subject: [PATCH 5/5] device: optimize message encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimize message encoding by eliminating binary.Write (which internally uses reflection) in favour of hand-rolled encoding. This is companion to 9e7529c3d2d0c54f4d5384c01645a9279e4740ae. Synthetic benchmark: var packetSink []byte func BenchmarkMessageInitiationMarshal(b *testing.B) { var msg MessageInitiation b.Run("binary.Write", func(b *testing.B) { b.ReportAllocs() for range b.N { var buf [MessageInitiationSize]byte writer := bytes.NewBuffer(buf[:0]) _ = binary.Write(writer, binary.LittleEndian, msg) packetSink = writer.Bytes() } }) b.Run("binary.Encode", func(b *testing.B) { b.ReportAllocs() for range b.N { packet := make([]byte, MessageInitiationSize) _, _ = binary.Encode(packet, binary.LittleEndian, msg) packetSink = packet } }) b.Run("marshal", func(b *testing.B) { b.ReportAllocs() for range b.N { packet := make([]byte, MessageInitiationSize) _ = msg.marshal(packet) packetSink = packet } }) } Results: │ - │ │ sec/op │ MessageInitiationMarshal/binary.Write-8 1.337µ ± 0% MessageInitiationMarshal/binary.Encode-8 1.242µ ± 0% MessageInitiationMarshal/marshal-8 53.05n ± 1% │ - │ │ B/op │ MessageInitiationMarshal/binary.Write-8 368.0 ± 0% MessageInitiationMarshal/binary.Encode-8 160.0 ± 0% MessageInitiationMarshal/marshal-8 160.0 ± 0% │ - │ │ allocs/op │ MessageInitiationMarshal/binary.Write-8 3.000 ± 0% MessageInitiationMarshal/binary.Encode-8 1.000 ± 0% MessageInitiationMarshal/marshal-8 1.000 ± 0% cherry picked from commit WireGuard/wireguard-go@264889f0bbdf9250bb8389a637dd5f38389bfe0b Updates tailscale/corp#28879 Signed-off-by: Alexander Yastrebov Signed-off-by: Jason A. Donenfeld --- device/noise-protocol.go | 45 ++++++++++++++++++++++++++++++++++++++++ device/send.go | 21 +++++++------------ 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/device/noise-protocol.go b/device/noise-protocol.go index 1e99f6886..cb4dedb11 100644 --- a/device/noise-protocol.go +++ b/device/noise-protocol.go @@ -134,6 +134,22 @@ func (msg *MessageInitiation) unmarshal(b []byte) error { return nil } +func (msg *MessageInitiation) marshal(b []byte) error { + if len(b) != MessageInitiationSize { + return errMessageLengthMismatch + } + + binary.LittleEndian.PutUint32(b, msg.Type) + binary.LittleEndian.PutUint32(b[4:], msg.Sender) + copy(b[8:], msg.Ephemeral[:]) + copy(b[8+len(msg.Ephemeral):], msg.Static[:]) + copy(b[8+len(msg.Ephemeral)+len(msg.Static):], msg.Timestamp[:]) + copy(b[8+len(msg.Ephemeral)+len(msg.Static)+len(msg.Timestamp):], msg.MAC1[:]) + copy(b[8+len(msg.Ephemeral)+len(msg.Static)+len(msg.Timestamp)+len(msg.MAC1):], msg.MAC2[:]) + + return nil +} + func (msg *MessageResponse) unmarshal(b []byte) error { if len(b) != MessageResponseSize { return errMessageLengthMismatch @@ -150,6 +166,22 @@ func (msg *MessageResponse) unmarshal(b []byte) error { return nil } +func (msg *MessageResponse) marshal(b []byte) error { + if len(b) != MessageResponseSize { + return errMessageLengthMismatch + } + + binary.LittleEndian.PutUint32(b, msg.Type) + binary.LittleEndian.PutUint32(b[4:], msg.Sender) + binary.LittleEndian.PutUint32(b[8:], msg.Receiver) + copy(b[12:], msg.Ephemeral[:]) + copy(b[12+len(msg.Ephemeral):], msg.Empty[:]) + copy(b[12+len(msg.Ephemeral)+len(msg.Empty):], msg.MAC1[:]) + copy(b[12+len(msg.Ephemeral)+len(msg.Empty)+len(msg.MAC1):], msg.MAC2[:]) + + return nil +} + func (msg *MessageCookieReply) unmarshal(b []byte) error { if len(b) != MessageCookieReplySize { return errMessageLengthMismatch @@ -163,6 +195,19 @@ func (msg *MessageCookieReply) unmarshal(b []byte) error { return nil } +func (msg *MessageCookieReply) marshal(b []byte) error { + if len(b) != MessageCookieReplySize { + return errMessageLengthMismatch + } + + binary.LittleEndian.PutUint32(b, msg.Type) + binary.LittleEndian.PutUint32(b[4:], msg.Receiver) + copy(b[8:], msg.Nonce[:]) + copy(b[8+len(msg.Nonce):], msg.Cookie[:]) + + return nil +} + type Handshake struct { state handshakeState mutex sync.RWMutex diff --git a/device/send.go b/device/send.go index 8ed2e5f6c..7900f577f 100644 --- a/device/send.go +++ b/device/send.go @@ -6,7 +6,6 @@ package device import ( - "bytes" "encoding/binary" "errors" "net" @@ -124,10 +123,8 @@ func (peer *Peer) SendHandshakeInitiation(isRetry bool) error { return err } - var buf [MessageInitiationSize]byte - writer := bytes.NewBuffer(buf[:0]) - binary.Write(writer, binary.LittleEndian, msg) - packet := writer.Bytes() + packet := make([]byte, MessageInitiationSize) + _ = msg.marshal(packet) peer.cookieGenerator.AddMacs(packet) peer.timersAnyAuthenticatedPacketTraversal() @@ -155,10 +152,8 @@ func (peer *Peer) SendHandshakeResponse() error { return err } - var buf [MessageResponseSize]byte - writer := bytes.NewBuffer(buf[:0]) - binary.Write(writer, binary.LittleEndian, response) - packet := writer.Bytes() + packet := make([]byte, MessageResponseSize) + _ = response.marshal(packet) peer.cookieGenerator.AddMacs(packet) err = peer.BeginSymmetricSession() @@ -189,11 +184,11 @@ func (device *Device) SendHandshakeCookie(initiatingElem *QueueHandshakeElement) return err } - var buf [MessageCookieReplySize]byte - writer := bytes.NewBuffer(buf[:0]) - binary.Write(writer, binary.LittleEndian, reply) + packet := make([]byte, MessageCookieReplySize) + _ = reply.marshal(packet) // TODO: allocation could be avoided - device.net.bind.Send([][]byte{writer.Bytes()}, initiatingElem.endpoint) + device.net.bind.Send([][]byte{packet}, initiatingElem.endpoint) + return nil }