From f5f3bb585941ee4a4f3954c2c871f9ac11983cf3 Mon Sep 17 00:00:00 2001 From: Albin Kerouanton Date: Sun, 21 Dec 2025 10:21:54 +0100 Subject: [PATCH] vm networking: add flag vnet_hdr When segmentation offload is enabled, and unsegmented packets are sent to a VM (i.e. when running a container in the root netns), the kernel will detect that packets are larger than expected and proceed. That's not the case for containers (i.e. when running a container with its own netns, and a veth pair). In that case, packets reach the virtio-net interface, are forwarded to the bridge, and then to the appropriate veth. Unsegmented packets with GSO fields unset are dropped by the kernel either at the bridge or at the veth level. That may be due to the current network topology where the vnet interface is attached to a bridge. In that case, we need to tell libkrun that the network backend sends / receives virtio_net_hdr structs with the packets, and the backend need to preserve GSO fields for VM-to-VM connections, or populate them for host-to-VM connections. Signed-off-by: Albin Kerouanton --- docs/vm-networking.md | 2 ++ internal/shim/task/networking_unix.go | 18 +++++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/docs/vm-networking.md b/docs/vm-networking.md index c2abdc5..1eb7e2b 100644 --- a/docs/vm-networking.md +++ b/docs/vm-networking.md @@ -49,6 +49,8 @@ that take the following fields: VFKIT magic sequence after connecting to the `socket`. Accept any of `1, t, T, TRUE, true, True, 0, f, F, FALSE, false, False`. Any other value is invalid and will produce an error. +- `vnet_hdr` (optional, defaults to false): Indicate whether the VMM includes + virtio-net headers along with Ethernet frames. Note that the first network specified will be used as the default gateway. diff --git a/internal/shim/task/networking_unix.go b/internal/shim/task/networking_unix.go index e894837..b85b9b2 100644 --- a/internal/shim/task/networking_unix.go +++ b/internal/shim/task/networking_unix.go @@ -30,6 +30,11 @@ import ( "github.com/containerd/nerdbox/internal/vm" ) +const ( + NET_FLAG_VFKIT = 1 << iota // See https://github.com/containers/libkrun/blob/357ec63fee444b973e4fc76d2121fd41631f121e/include/libkrun.h#L271C9-L271C23 + NET_FLAG_INCLUDE_VNET_HEADER +) + type networksProvider struct { nws []network } @@ -43,6 +48,7 @@ type network struct { addr6 netip.Prefix // addr6 is the IPv6 address + subnet mask of the network interface features uint32 // features is a bitmask of virtio-net features enabled on this network endpoint vfkit bool // vfkit is a boolean flag indicating whether libkrun must send the VFKIT magic sequence after connecting to the socket. + vnetHdr bool // vnetHdr is a boolean flag indicating whether libkrun must include virtio-net headers along with Ethernet frames. } const ( @@ -57,6 +63,7 @@ const ( addrField = "addr" featuresField = "features" // features is a bitwise-OR separated list of virtio-net features. See https://docs.oasis-open.org/virtio/virtio/v1.3/csd01/virtio-v1.3-csd01.html#x1-2370003 vfkitField = "vfkit" // vfkit is a boolean flag indicating whether libkrun must send the VFKIT magic sequence after connecting to the socket. + vnetHdrField = "vnet_hdr" nwModeUnixgram = "unixgram" nwModeUnixstream = "unixstream" @@ -149,6 +156,12 @@ func parseNetwork(annotation string) (network, error) { return network{}, fmt.Errorf("parsing vfkit field: %w", err) } n.vfkit = vfkit + case vnetHdrField: + vnetHdr, err := strconv.ParseBool(value) + if err != nil { + return network{}, fmt.Errorf("parsing vnet_hdr field: %w", err) + } + n.vnetHdr = vnetHdr default: return network{}, fmt.Errorf("unknown network field: %s", key) } @@ -180,7 +193,10 @@ func (p *networksProvider) SetupVM(ctx context.Context, vmi vm.Instance) error { var flags uint32 if nw.vfkit { - flags = 1 // See https://github.com/containers/libkrun/blob/357ec63fee444b973e4fc76d2121fd41631f121e/include/libkrun.h#L271C9-L271C23 + flags = NET_FLAG_VFKIT + } + if nw.vnetHdr { + flags |= NET_FLAG_INCLUDE_VNET_HEADER } if err := vmi.AddNIC(ctx, nw.endpoint, nw.mac, nwMode, nw.features, flags); err != nil {