From 4c66a9118eb99d9c4a162cdfe8540219d4388f44 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Thu, 13 Nov 2025 21:42:52 -0500 Subject: [PATCH 01/13] add mFIB entries for host IPv6 multicast traffic Signed-off-by: Aritra Basu --- vpp-manager/vpp_runner.go | 53 +++++++++++++++++++++++++++- vpplink/routes.go | 72 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index d9fd9f90e..41df393db 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -355,6 +355,48 @@ func (v *VppRunner) allocateStaticVRFs() error { return nil } +// setupIPv6MulticastForHostTap configures mFIB entries to allow IPv6 multicast traffic +// from the Linux host to pass through VPP. This is required for DHCPv6, NDP, and other +// IPv6 protocols that use link-local multicast. +// Without this configuration, packets arriving from the tap interface fail RPF checks +// because the tap interface is not in the mFIB accept list. +func (v *VppRunner) setupIPv6MulticastForHostTap(vrfID uint32, tapSwIfIndex uint32, uplinkSwIfIndex uint32) error { + log.Infof("Setting up IPv6 multicast forwarding for host tap in VRF %d", vrfID) + + // IPv6 multicast groups that need to be forwarded from the Linux host + multicastGroups := []struct { + addr string + comment string + }{ + {"ff02::1:2", "DHCPv6 All Relay Agents and Servers (REQUIRED for DHCPv6)"}, + {"ff02::1", "All Nodes (for NDP)"}, + {"ff02::2", "All Routers (for NDP/RA)"}, + } + + for _, group := range multicastGroups { + groupIP := net.ParseIP(group.addr) + if groupIP == nil { + log.Warnf("Invalid multicast address: %s", group.addr) + continue + } + + groupNet := &net.IPNet{ + IP: groupIP, + Mask: net.CIDRMask(128, 128), // /128 - specific group + } + + err := v.vpp.MRouteAddForHostMulticast(vrfID, groupNet, tapSwIfIndex, uplinkSwIfIndex) + if err != nil { + return errors.Wrapf(err, "cannot add mFIB route for %s (%s) in VRF %d", + group.addr, group.comment, vrfID) + } + + log.Infof("Added mFIB route for %s (%s) in VRF %d", group.addr, group.comment, vrfID) + } + + return nil +} + // Configure specific VRFs for a given tap to the host to handle broadcast / multicast traffic sent by the host func (v *VppRunner) setupTapVRF(ifSpec *config.UplinkInterfaceSpec, ifState *config.LinuxInterfaceState, tapSwIfIndex uint32) (vrfs []uint32, err error) { for _, ipFamily := range vpplink.IPFamilies { @@ -379,7 +421,16 @@ func (v *VppRunner) setupTapVRF(ifSpec *config.UplinkInterfaceSpec, ifState *con if err != nil { log.Errorf("cannot add broadcast route in vpp: %v", err) } - } // else {} No custom routes for IPv6 for now. Forward LL multicast from the host? + } else { + // Setup IPv6 multicast forwarding for the host + // This is required for DHCPv6 solicitations, NDP, and other link-local multicast + // Unlike IPv4, we cannot use a unicast route trick because ff02::/16 is multicast + // and must go through mFIB with proper RPF configuration + err = v.setupIPv6MulticastForHostTap(vrfID, tapSwIfIndex, ifSpec.SwIfIndex) + if err != nil { + return []uint32{}, errors.Wrap(err, "Error setting up IPv6 multicast forwarding") + } + } // default route in default table err = v.vpp.AddDefaultRouteViaTable(vrfID, config.Info.PhysicalNets[ifSpec.PhysicalNetworkName].VrfID, ipFamily.IsIP6) diff --git a/vpplink/routes.go b/vpplink/routes.go index ac11f2259..b8ffb62e4 100644 --- a/vpplink/routes.go +++ b/vpplink/routes.go @@ -202,6 +202,78 @@ func (v *VppLink) addDelIPMRoute(route *types.Route, flags mfib_types.MfibEntryF return nil } +// MRouteAddForHostMulticast adds an mFIB route with explicit interface flags for each path +// This is needed for forwarding multicast traffic like DHCPv6 solicitations from the host +// For DHCPv6 from Linux host via tap: +// - tapSwIfIndex should have ACCEPT flag (allow packets from tap) +// - uplinkSwIfIndex should have ACCEPT|FORWARD flags (forward to uplink, accept replies) +func (v *VppLink) MRouteAddForHostMulticast(tableID uint32, group *net.IPNet, tapSwIfIndex, uplinkSwIfIndex uint32) error { + client := vppip.NewServiceClient(v.GetConnection()) + + isIP6 := group.IP.To4() == nil + ones, _ := group.Mask.Size() + prefix := ip_types.Mprefix{ + Af: types.ToVppAddressFamily(isIP6), + GrpAddressLength: uint16(ones), + GrpAddress: types.ToVppAddress(group.IP).Un, + // SrcAddress is all zeros for (*,G) entries + } + + // Create mFIB paths with explicit interface flags + paths := []mfib_types.MfibPath{ + { + // Uplink interface: Accept + Forward + // Accept incoming multicast from network, forward outgoing multicast to network + ItfFlags: mfib_types.MFIB_API_ITF_FLAG_ACCEPT | mfib_types.MFIB_API_ITF_FLAG_FORWARD, + Path: fib_types.FibPath{ + SwIfIndex: uplinkSwIfIndex, + TableID: 0, + RpfID: 0, + Weight: 1, + Preference: 0, + Type: fib_types.FIB_API_PATH_TYPE_NORMAL, + Flags: fib_types.FIB_API_PATH_FLAG_NONE, + Proto: types.IsV6toFibProto(isIP6), + }, + }, + { + // Tap interface: Accept only + // This allows packets FROM Linux host to pass RPF check + ItfFlags: mfib_types.MFIB_API_ITF_FLAG_ACCEPT, + Path: fib_types.FibPath{ + SwIfIndex: tapSwIfIndex, + TableID: 0, + RpfID: 0, + Weight: 1, + Preference: 0, + Type: fib_types.FIB_API_PATH_TYPE_NORMAL, + Flags: fib_types.FIB_API_PATH_FLAG_NONE, + Proto: types.IsV6toFibProto(isIP6), + }, + }, + } + + vppRoute := vppip.IPMroute{ + TableID: tableID, + Prefix: prefix, + EntryFlags: mfib_types.MFIB_API_ENTRY_FLAG_NONE, // Use interface-based RPF, not ACCEPT_ALL_ITF + Paths: paths, + RpfID: 0, // No RPF-ID, use interface-based checking + } + + _, err := client.IPMrouteAddDel(v.GetContext(), &vppip.IPMrouteAddDel{ + IsAdd: true, + Route: vppRoute, + }) + if err != nil { + return fmt.Errorf("failed to add mroute for host multicast %s in table %d: %w", group.String(), tableID, err) + } + + v.GetLog().Infof("Added mFIB route for host multicast %s in table %d (tap=%d, uplink=%d)", + group.String(), tableID, tapSwIfIndex, uplinkSwIfIndex) + return nil +} + func (v *VppLink) addDelDefaultMRouteViaTable(srcTable, dstTable uint32, isIP6 bool, isAdd bool) error { route := &types.Route{ Paths: []types.RoutePath{{ From bd95db54848cc95746ae9eda875f4df20ee38a75 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Wed, 19 Nov 2025 18:41:34 -0500 Subject: [PATCH 02/13] fix incorrect prefix length for IPv6 neighbors Signed-off-by: Aritra Basu --- calico-vpp-agent/routing/bgp_watcher.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/calico-vpp-agent/routing/bgp_watcher.go b/calico-vpp-agent/routing/bgp_watcher.go index 56e69103f..7055f5e13 100644 --- a/calico-vpp-agent/routing/bgp_watcher.go +++ b/calico-vpp-agent/routing/bgp_watcher.go @@ -543,10 +543,14 @@ func (s *Server) WatchBGPPath(t *tomb.Tomb) error { peer := localPeer.Peer filters := localPeer.BGPFilterNames // create a neighbor set to apply filter only on specific peer using a global policy + prefixLen := "/32" + if ip := net.ParseIP(peer.Conf.NeighborAddress); ip != nil && ip.To4() == nil { + prefixLen = "/128" + } neighborSet := &bgpapi.DefinedSet{ Name: peer.Conf.NeighborAddress + "neighbor", DefinedType: bgpapi.DefinedType_NEIGHBOR, - List: []string{peer.Conf.NeighborAddress + "/32"}, + List: []string{peer.Conf.NeighborAddress + prefixLen}, } err := s.BGPServer.AddDefinedSet(context.Background(), &bgpapi.AddDefinedSetRequest{ DefinedSet: neighborSet, From 892b6ae22e049d0934e791c56edcc1ef2e81595b Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Mon, 17 Nov 2025 21:29:27 -0500 Subject: [PATCH 03/13] fix for bind failure on BGP global IPv6 address Signed-off-by: Aritra Basu --- calico-vpp-agent/routing/routing_server.go | 38 ++++++++++++++++++---- config/config.go | 14 ++++++-- config/config_parse.go | 11 +++++++ 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/calico-vpp-agent/routing/routing_server.go b/calico-vpp-agent/routing/routing_server.go index 046e1a288..51b4fc0c6 100644 --- a/calico-vpp-agent/routing/routing_server.go +++ b/calico-vpp-agent/routing/routing_server.go @@ -31,6 +31,7 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" + "github.com/projectcalico/vpp-dataplane/v3/config" "github.com/projectcalico/vpp-dataplane/v3/vpplink" ) @@ -114,17 +115,28 @@ func (s *Server) ServeRouting(t *tomb.Tomb) (err error) { } for t.Alive() { - globalConfig, err := s.getGoBGPGlobalConfig() + nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) + globalConfig, err := s.getGoBGPGlobalConfig(*config.BGPServerMode) if err != nil { return fmt.Errorf("cannot get global configuration: %v", err) } err = s.BGPServer.StartBgp(context.Background(), &bgpapi.StartBgpRequest{Global: globalConfig}) - if err != nil { + if err != nil && *config.BGPServerMode == config.BGPServerModeDualStack && nodeIP4 != nil { + s.log.Warnf("Failed to start BGP server in dualStack mode: %v. Retrying with IPv4-only listener", err) + globalConfig, err = s.getGoBGPGlobalConfig(config.BGPServerModeV4Only) + if err != nil { + return errors.Wrap(err, "cannot get IPv4-only BGP configuration for fallback") + } + err = s.BGPServer.StartBgp(context.Background(), &bgpapi.StartBgpRequest{Global: globalConfig}) + if err != nil { + return errors.Wrap(err, "failed to start BGP server after IPv4-only fallback") + } + s.log.Warn("BGP server started in degraded IPv4-only mode because IPv6 listener failed") + } else if err != nil { return errors.Wrap(err, "failed to start BGP server") } - nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) if nodeIP4 != nil { err = s.initialPolicySetting(false /* isv6 */) if err != nil { @@ -176,7 +188,7 @@ func (s *Server) getLogSeverityScreen() string { return s.BGPConf.LogSeverityScreen } -func (s *Server) getGoBGPGlobalConfig() (*bgpapi.Global, error) { +func (s *Server) getGoBGPGlobalConfig(mode config.BGPServerModeType) (*bgpapi.Global, error) { var routerID string listenAddresses := make([]string, 0) asn := s.nodeBGPSpec.ASNumber @@ -185,11 +197,25 @@ func (s *Server) getGoBGPGlobalConfig() (*bgpapi.Global, error) { } nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) - if nodeIP6 != nil { + useIP4 := nodeIP4 != nil + useIP6 := nodeIP6 != nil + + switch mode { + case config.BGPServerModeDualStack: + case config.BGPServerModeV4Only: + useIP6 = false + if !useIP4 { + return nil, fmt.Errorf("BGP server mode set to v4Only but no IPv4 node address configured") + } + default: + return nil, fmt.Errorf("unsupported BGP server mode %q", mode) + } + + if useIP6 { routerID = nodeIP6.String() listenAddresses = append(listenAddresses, routerID) } - if nodeIP4 != nil { + if useIP4 { routerID = nodeIP4.String() // Override v6 ID if v4 is available listenAddresses = append(listenAddresses, routerID) } diff --git a/config/config.go b/config/config.go index 4f113a235..7c8a3d876 100644 --- a/config/config.go +++ b/config/config.go @@ -64,14 +64,22 @@ const ( BaseVppSideHardwareAddress = "02:ca:11:c0:fd:00" ) +type BGPServerModeType string + +const ( + BGPServerModeDualStack BGPServerModeType = "dualStack" + BGPServerModeV4Only BGPServerModeType = "v4Only" +) + var ( // fake constants for place where we need a pointer to true or false True = true False = false - NodeName = RequiredStringEnvVar("NODENAME") - LogLevel = EnvVar("CALICOVPP_LOG_LEVEL", logrus.InfoLevel, logrus.ParseLevel) - BGPLogLevel = EnvVar("CALICOVPP_BGP_LOG_LEVEL", apipb.SetLogLevelRequest_INFO, BGPLogLevelParse) + NodeName = RequiredStringEnvVar("NODENAME") + LogLevel = EnvVar("CALICOVPP_LOG_LEVEL", logrus.InfoLevel, logrus.ParseLevel) + BGPLogLevel = EnvVar("CALICOVPP_BGP_LOG_LEVEL", apipb.SetLogLevelRequest_INFO, BGPLogLevelParse) + BGPServerMode = EnvVar("CALICOVPP_BGP_SERVER_MODE", BGPServerModeDualStack, BGPServerModeParse) ServiceCIDRs = PrefixListEnvVar("SERVICE_PREFIX") IPSecIkev2Psk = StringEnvVar("CALICOVPP_IPSEC_IKEV2_PSK", "") diff --git a/config/config_parse.go b/config/config_parse.go index c72ca9049..aaa0dde8f 100644 --- a/config/config_parse.go +++ b/config/config_parse.go @@ -257,3 +257,14 @@ func BGPLogLevelParse(lvl string) (apipb.SetLogLevelRequest_Level, error) { var l apipb.SetLogLevelRequest_Level return l, fmt.Errorf("not a valid logrus Level: %q", lvl) } + +func BGPServerModeParse(mode string) (BGPServerModeType, error) { + switch strings.ToLower(mode) { + case strings.ToLower(string(BGPServerModeDualStack)): + return BGPServerModeDualStack, nil + case "v4only": + return BGPServerModeV4Only, nil + } + + return BGPServerModeDualStack, fmt.Errorf("not a valid BGP server mode: %q", mode) +} From cebad50e939b90ac54637379f3a8271943d36b16 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Mon, 17 Nov 2025 21:44:52 -0500 Subject: [PATCH 04/13] add NULL check before NeighborSet deletion Signed-off-by: Aritra Basu --- calico-vpp-agent/routing/bgp_watcher.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/calico-vpp-agent/routing/bgp_watcher.go b/calico-vpp-agent/routing/bgp_watcher.go index 7055f5e13..a64832042 100644 --- a/calico-vpp-agent/routing/bgp_watcher.go +++ b/calico-vpp-agent/routing/bgp_watcher.go @@ -584,9 +584,18 @@ func (s *Server) WatchBGPPath(t *tomb.Tomb) error { if err != nil { return errors.Wrapf(err, "error cleaning peer filters up") } - err = s.BGPServer.DeleteDefinedSet(context.Background(), &bgpapi.DeleteDefinedSetRequest{DefinedSet: s.bgpPeers[addr].NeighborSet, All: true}) - if err != nil { - return errors.Wrapf(err, "error deleting prefix set") + if s.bgpPeers[addr] == nil { + s.log.Warnf("Trying to delete unknown BGP peer %s", addr) + } else if s.bgpPeers[addr].NeighborSet == nil { + s.log.Warnf("Trying to delete BGP peer %s with empty NeighborSet", addr) + } else { + err = s.BGPServer.DeleteDefinedSet(context.Background(), &bgpapi.DeleteDefinedSetRequest{ + DefinedSet: s.bgpPeers[addr].NeighborSet, + All: true, + }) + if err != nil { + return errors.Wrapf(err, "error deleting prefix set") + } } err := s.BGPServer.DeletePeer( context.Background(), From 4806b3c51149a6e47efeca61fe10033da0505b96 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Thu, 4 Dec 2025 22:07:00 -0800 Subject: [PATCH 05/13] add mFIB entry for Solicited-Node Multicast and MLDv2-routers Signed-off-by: Aritra Basu --- vpp-manager/vpp_runner.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index 41df393db..dfa845206 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -366,11 +366,14 @@ func (v *VppRunner) setupIPv6MulticastForHostTap(vrfID uint32, tapSwIfIndex uint // IPv6 multicast groups that need to be forwarded from the Linux host multicastGroups := []struct { addr string + prefix int // CIDR prefix length comment string }{ - {"ff02::1:2", "DHCPv6 All Relay Agents and Servers (REQUIRED for DHCPv6)"}, - {"ff02::1", "All Nodes (for NDP)"}, - {"ff02::2", "All Routers (for NDP/RA)"}, + {"ff02::1:ff00:0", 104, "Solicited-Node multicast (NDP Neighbor Solicitation targets)"}, + {"ff02::1", 128, "All Nodes / All Hosts (link-local; used by NDP and others)"}, + {"ff02::2", 128, "All Routers (routers listen here; NDP RS target)"}, + {"ff02::16", 128, "All MLDv2-capable routers"}, + {"ff02::1:2", 128, "DHCPv6 All Relay Agents and Servers"}, } for _, group := range multicastGroups { @@ -382,7 +385,7 @@ func (v *VppRunner) setupIPv6MulticastForHostTap(vrfID uint32, tapSwIfIndex uint groupNet := &net.IPNet{ IP: groupIP, - Mask: net.CIDRMask(128, 128), // /128 - specific group + Mask: net.CIDRMask(group.prefix, 128), } err := v.vpp.MRouteAddForHostMulticast(vrfID, groupNet, tapSwIfIndex, uplinkSwIfIndex) From 6f2ed4a54a5a06f3d214978e29a8702a8db98b71 Mon Sep 17 00:00:00 2001 From: Nathan Skrzypczak Date: Fri, 5 Dec 2025 19:00:27 +0100 Subject: [PATCH 06/13] Remove node address from VPP tap0 This patch removes the nodeIP from the tap0 interface in VPP. With this patch, for each uplink interface eth0 with IP 192.168.0.1/24 we create a corresponding tap0 set up the following way: * In VRF:0 * we create the af_packet interface with IP 192.168.0.1/24 * we receive 192.168.0.1/32 locally, traffic to 192.168.0.1 without listeners will end up in punt * In the punt table * we route 192.168.0.1/24 via tap0 192.168.0.1 * In linux * tap0 has the 192.168.0.1/24 address * tap0 will respond to ARPs as VPP has arp proxy enabled * In a host-tap-eth0-v4 VRF * we place the tap0 interface * we give it the 169.254.0.1/32 address, overridable with CALICOVPP_TAP0_ADDR * we enable IP6 without setting an address * we add a static neighbor for 192.168.0.1 to the MAC of the linux side of the tap * If we specify a rule in redirectToHostRules (e.g. for DNS in kind) * we will have the classifier entry redirect to tap0 192.168.0.1 Signed-off-by: Nathan Skrzypczak --- calico-vpp-agent/cni/cni_server.go | 11 +++- config/config.go | 27 +++++++-- config/config_parse.go | 13 ++++- vpp-manager/vpp_runner.go | 94 +++++++++++++++++------------- vpplink/helpers.go | 10 ++++ 5 files changed, 105 insertions(+), 50 deletions(-) diff --git a/calico-vpp-agent/cni/cni_server.go b/calico-vpp-agent/cni/cni_server.go index 7182b6fa8..01eaeb305 100644 --- a/calico-vpp-agent/cni/cni_server.go +++ b/calico-vpp-agent/cni/cni_server.go @@ -538,10 +538,17 @@ func (s *Server) createRedirectToHostRules() (uint32, error) { return types.InvalidID, fmt.Errorf("no main interface found") } for _, rule := range config.GetCalicoVppInitialConfig().RedirectToHostRules { + mainInterfaceAddress := mainInterface.GetAddress(vpplink.IPFamilyFromIP(rule.IP)) + if mainInterfaceAddress == nil { + return types.InvalidID, fmt.Errorf("error installing rule %v no address found on uplink", rule) + } err = s.vpp.AddSessionRedirect(&types.SessionRedirect{ - FiveTuple: types.NewDst3Tuple(rule.Proto, net.ParseIP(rule.IP), rule.Port), + FiveTuple: types.NewDst3Tuple(rule.Proto, rule.IP, rule.Port), TableIndex: index, - }, &types.RoutePath{Gw: config.VppHostPuntFakeGatewayAddress, SwIfIndex: mainInterface.TapSwIfIndex}) + }, &types.RoutePath{ + Gw: mainInterfaceAddress.IP, + SwIfIndex: mainInterface.TapSwIfIndex, + }) if err != nil { return types.InvalidID, err } diff --git a/config/config.go b/config/config.go index 7c8a3d876..7756c3db9 100644 --- a/config/config.go +++ b/config/config.go @@ -141,11 +141,17 @@ var ( HookScriptVppErrored, } - Info = &VppManagerInfo{} + Info = &VppManagerInfo{ + UplinkStatuses: make(map[string]UplinkStatus), + PhysicalNets: make(map[string]PhysicalNetwork), + } - // VppHostPuntFakeGatewayAddress is the fake gateway we use with a static neighbor - // in the punt table to route punted packets to the host - VppHostPuntFakeGatewayAddress = net.ParseIP("169.254.0.1") + // VppsideTap0Address is the IP address we add to the tap0 + // so that it can receive ipv4 packets + VppsideTap0Address = PrefixEnvVar( + "CALICOVPP_TAP0_ADDR", + MustParseCIDR("169.254.0.1/32"), + ) ) func RunHook(hookScript *string, hookName string, params *VppManagerParams, log *logrus.Logger) { @@ -279,7 +285,7 @@ func (u *UplinkInterfaceSpec) String() string { type RedirectToHostRulesConfigType struct { Port uint16 `json:"port,omitempty"` - IP string `json:"ip,omitempty"` + IP net.IP `json:"ip,omitempty"` /* "tcp", "udp",... */ Proto types.IPProto `json:"proto,omitempty"` } @@ -575,6 +581,17 @@ type UplinkStatus struct { // FakeNextHopIP6 is the computed next hop for v6 routes added // in linux to (ServiceCIDR, podCIDR, etc...) towards this interface FakeNextHopIP6 net.IP + + UplinkAddresses []*net.IPNet +} + +func (uplinkStatus *UplinkStatus) GetAddress(ipFamily vpplink.IPFamily) *net.IPNet { + for _, addr := range uplinkStatus.UplinkAddresses { + if vpplink.IPFamilyFromIPNet(addr) == ipFamily { + return addr + } + } + return nil } type PhysicalNetwork struct { diff --git a/config/config_parse.go b/config/config_parse.go index aaa0dde8f..ca3002e29 100644 --- a/config/config_parse.go +++ b/config/config_parse.go @@ -162,7 +162,18 @@ func prefixParser(value string) (net.IPNet, error) { func RequiredPrefixEnvVar(varName string) *net.IPNet { return RequiredEnvVar(varName, net.IPNet{}, prefixParser) } -func PrefixEnvVar(varName string) *net.IPNet { return EnvVar(varName, net.IPNet{}, prefixParser) } + +func PrefixEnvVar(varName string, defaultValue *net.IPNet) *net.IPNet { + return EnvVar(varName, *defaultValue, prefixParser) +} + +func MustParseCIDR(str string) *net.IPNet { + _, cidr, err := net.ParseCIDR(str) + if err != nil { + logrus.Fatalf("error parsing %s as cidr %v", str, err) + } + return cidr +} func prefixListParser(value string) ([]*net.IPNet, error) { chunks := strings.Split(value, ",") diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index dfa845206..69da49c55 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -139,22 +139,13 @@ func (v *VppRunner) configureGlobalPunt() (err error) { } func (v *VppRunner) configurePunt(tapSwIfIndex uint32, ifState config.LinuxInterfaceState) (err error) { - err = v.vpp.AddNeighbor(&types.Neighbor{ - SwIfIndex: tapSwIfIndex, - IP: config.VppHostPuntFakeGatewayAddress, - HardwareAddr: ifState.HardwareAddr, - Flags: types.IPNeighborStatic, - }) - if err != nil { - return errors.Wrapf(err, "Error adding neighbor %s to tap", config.VppHostPuntFakeGatewayAddress) - } /* In the punt table (where all punted traffics ends), route to the tap */ - for _, address := range ifState.Addresses { + for _, addr := range ifState.Addresses { err = v.vpp.RouteAdd(&types.Route{ - Dst: address.IPNet, Table: common.PuntTableID, + Dst: addr.IPNet, Paths: []types.RoutePath{{ - Gw: config.VppHostPuntFakeGatewayAddress, + Gw: addr.IP, SwIfIndex: tapSwIfIndex, }}, }) @@ -162,7 +153,6 @@ func (v *VppRunner) configurePunt(tapSwIfIndex uint32, ifState config.LinuxInter return errors.Wrapf(err, "error adding vpp side routes for interface") } } - return nil } @@ -446,26 +436,42 @@ func (v *VppRunner) setupTapVRF(ifSpec *config.UplinkInterfaceSpec, ifState *con return []uint32{}, errors.Wrapf(err, "error setting vpp tap in vrf %d", vrfID) } vrfs = append(vrfs, vrfID) - } - // Configure addresses to enable ipv4 & ipv6 on the tap - for _, addr := range ifState.Addresses { - if addr.IP.IsLinkLocalUnicast() && !common.IsFullyQualified(addr.IPNet) && common.IsV6Cidr(addr.IPNet) { - log.Infof("Not adding address %s to data interface (vpp requires /128 link-local)", addr.String()) - continue - } else { - log.Infof("Adding address %s to tap interface", addr.String()) - } - // to max len cidr because we don't want the rest of the subnet to be considered as - // connected to that interface - // note that the role of these addresses is just to tell vpp to accept ip4 / ip6 packets on the tap - // we use these addresses as the safest option, because as they are configured on linux, linux - // will never send us packets with these addresses as destination - err = v.vpp.AddInterfaceAddress(tapSwIfIndex, common.ToMaxLenCIDR(addr.IP)) - if err != nil { - log.Errorf("Error adding address to tap interface: %v", err) + for _, addr := range ifState.Addresses { + if vpplink.IPFamilyFromIP(addr.IP) == ipFamily { + err = v.vpp.RouteAdd(&types.Route{ + Table: vrfID, + Dst: common.FullyQualified(addr.IP), + Paths: []types.RoutePath{{ + Gw: addr.IP, + SwIfIndex: tapSwIfIndex, + }}, + }) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error add route from VPP to tap0 in VRF %d", vrfID) + } + err = v.vpp.AddNeighbor(&types.Neighbor{ + SwIfIndex: tapSwIfIndex, + IP: addr.IP, + HardwareAddr: ifState.HardwareAddr, + Flags: types.IPNeighborStatic, + }) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error add static neighbor for tap0 in VRF %d", vrfID) + } + } } } + + err = v.vpp.EnableInterfaceIP6(tapSwIfIndex) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error enabling ip6 for tap %d", tapSwIfIndex) + } + + err = v.vpp.AddInterfaceAddress(tapSwIfIndex, config.VppsideTap0Address) + if err != nil { + return []uint32{}, errors.Wrapf(err, "error adding vpp side address for tap0 %d", tapSwIfIndex) + } return vrfs, nil } @@ -691,18 +697,22 @@ func (v *VppRunner) configureVppUplinkInterface( return errors.Wrap(err, "Error setting tap up") } - if config.Info.UplinkStatuses != nil { - config.Info.UplinkStatuses[link.Attrs().Name] = config.UplinkStatus{ - TapSwIfIndex: tapSwIfIndex, - SwIfIndex: ifSpec.SwIfIndex, - Mtu: uplinkMtu, - PhysicalNetworkName: ifSpec.PhysicalNetworkName, - LinkIndex: link.Attrs().Index, - Name: link.Attrs().Name, - IsMain: ifSpec.IsMain, - FakeNextHopIP4: fakeNextHopIP4, - FakeNextHopIP6: fakeNextHopIP6, - } + uplinkAddresses := make([]*net.IPNet, 0) + for _, addr := range ifState.Addresses { + uplinkAddresses = append(uplinkAddresses, addr.IPNet) + } + + config.Info.UplinkStatuses[link.Attrs().Name] = config.UplinkStatus{ + TapSwIfIndex: tapSwIfIndex, + SwIfIndex: ifSpec.SwIfIndex, + Mtu: uplinkMtu, + PhysicalNetworkName: ifSpec.PhysicalNetworkName, + LinkIndex: link.Attrs().Index, + Name: link.Attrs().Name, + IsMain: ifSpec.IsMain, + FakeNextHopIP4: fakeNextHopIP4, + FakeNextHopIP6: fakeNextHopIP6, + UplinkAddresses: uplinkAddresses, } return nil } diff --git a/vpplink/helpers.go b/vpplink/helpers.go index a7cd40641..dbf73d89e 100644 --- a/vpplink/helpers.go +++ b/vpplink/helpers.go @@ -49,6 +49,16 @@ func IPFamilyFromIPNet(ipNet *net.IPNet) IPFamily { return IPFamilyV4 } +func IPFamilyFromIP(ip net.IP) IPFamily { + if ip == nil { + return IPFamilyV4 + } + if ip.To4() == nil { + return IPFamilyV6 + } + return IPFamilyV4 +} + type CleanupCall struct { args []interface{} f interface{} From 4d3c4ec580a8aecd958b833e477bdd0dba781acb Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Thu, 11 Dec 2025 18:47:50 -0500 Subject: [PATCH 07/13] fix IPv6 gateway reachability with ND proxy IPv6 gateway traffic (DHCPv6/ICMP) fails when VPP takes over the uplink. - Without gateway ND proxy, host NS for the default gateway is dropped by VPP with "neighbor solicitations for unknown targets" error due to missing /128 target entry in the tap FIB. Fix: - Enable ND proxy for the gateway on the tap so the host can resolve the gateway via VPP. Signed-off-by: Aritra Basu --- vpp-manager/vpp_runner.go | 16 ++++++++++++++++ vpplink/generated/vpp_clone_current.sh | 1 + 2 files changed, 17 insertions(+) diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index 69da49c55..5dcffc7d3 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -654,6 +654,22 @@ func (v *VppRunner) configureVppUplinkInterface( } } + /* + * Add ND proxy for IPv6 gateway addresses. + * Without ND proxy for gateway, host's NS for gateway is dropped with "neighbor + * solicitations for unknown targets" error because there's no /128 FIB entry. + * This requires VPP patch https://gerrit.fd.io/r/c/vpp/+/44350 to fix NA loop bug. + */ + for _, route := range ifState.Routes { + if route.Gw != nil && route.Gw.To4() == nil { + log.Infof("Adding ND proxy for IPv6 gateway %s", route.Gw) + err = v.vpp.EnableIP6NdProxy(tapSwIfIndex, route.Gw) + if err != nil { + log.Errorf("Error configuring ND proxy for gateway %s: %v", route.Gw, err) + } + } + } + if *config.GetCalicoVppDebug().GSOEnabled { err = v.vpp.EnableGSOFeature(tapSwIfIndex) if err != nil { diff --git a/vpplink/generated/vpp_clone_current.sh b/vpplink/generated/vpp_clone_current.sh index bc2f3f49f..fe6f1dc8e 100755 --- a/vpplink/generated/vpp_clone_current.sh +++ b/vpplink/generated/vpp_clone_current.sh @@ -145,6 +145,7 @@ git_cherry_pick refs/changes/07/43107/4 # 43107: vcl: fix fifo private vpp sh on git_cherry_pick refs/changes/14/43714/5 # 43714: session: fix handling of closed during migration | https://gerrit.fd.io/r/c/vpp/+/43714 git_cherry_pick refs/changes/39/43139/5 # 43139: udp: regrab connected session after transport clone | https://gerrit.fd.io/r/c/vpp/+/43139 git_cherry_pick refs/changes/23/43723/3 # 43723: session svm: fix session migrate attach data corruption | https://gerrit.fd.io/r/c/vpp/+/43723 +git_cherry_pick refs/changes/50/44350/2 # 44350: vnet: fix unicast NA handling in ND proxy | https://gerrit.fd.io/r/c/vpp/+/44350 # --------------- private plugins --------------- # Generated with 'git format-patch --zero-commit -o ./patches/ HEAD^^^' From 6326d9060955da68684bb13b3128493311b333b4 Mon Sep 17 00:00:00 2001 From: Nathan Skrzypczak Date: Mon, 19 Jan 2026 16:39:13 +0100 Subject: [PATCH 08/13] Add LinkLocal address to uplink in VPP Signed-off-by: Nathan Skrzypczak --- vpp-manager/vpp_runner.go | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index 5dcffc7d3..dd972a243 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -541,14 +541,17 @@ func (v *VppRunner) configureVppUplinkInterface( for _, addr := range ifState.Addresses { if addr.IP.IsLinkLocalUnicast() && !common.IsFullyQualified(addr.IPNet) && common.IsV6Cidr(addr.IPNet) { - log.Infof("Not adding address %s to uplink interface (vpp requires /128 link-local)", addr.String()) - continue + log.Infof("Adding %s instead of %s to uplink interface (vpp requires /128 link-local)", common.FullyQualified(addr.IPNet.IP).String(), addr.String()) + err = v.vpp.AddInterfaceAddress(ifSpec.SwIfIndex, common.FullyQualified(addr.IP)) + if err != nil { + log.Errorf("Error adding address to uplink interface: %v", err) + } } else { log.Infof("Adding address %s to uplink interface", addr.String()) - } - err = v.vpp.AddInterfaceAddress(ifSpec.SwIfIndex, addr.IPNet) - if err != nil { - log.Errorf("Error adding address to uplink interface: %v", err) + err = v.vpp.AddInterfaceAddress(ifSpec.SwIfIndex, addr.IPNet) + if err != nil { + log.Errorf("Error adding address to uplink interface: %v", err) + } } } for _, route := range ifState.Routes { From bc3fec6620a858bf52209217edc7f9ceec0a70f8 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Wed, 21 Jan 2026 15:15:11 -0500 Subject: [PATCH 09/13] Add ip6tables rule for DHCPv6 hop limit configuration Configure ip6tables mangle rule to set hop limit to 2 for DHCPv6 OUTPUT traffic from client (sport 546) to server (dport 547). This prevents VPP from dropping DHCPv6 SOLICIT/REQUEST packets when it decrements hop-limit by 1 during forwarding. Since clients generate SOLICIT/REQUEST with hop-limit=1, without this rule VPP drops the packet (ip6 ttl <= 1) with ICMP time exceeded, causing DHCPv6 lease negotiation to fail. The rule is checked for existence before adding to prevent duplicates since ip6tables does not auto-dedupe rules. The rule is also cleaned up during configuration restoration. Signed-off-by: Aritra Basu --- vpp-manager/images/ubuntu/Dockerfile | 2 +- vpp-manager/vpp_runner.go | 48 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/vpp-manager/images/ubuntu/Dockerfile b/vpp-manager/images/ubuntu/Dockerfile index 6f39d183b..4a665343b 100644 --- a/vpp-manager/images/ubuntu/Dockerfile +++ b/vpp-manager/images/ubuntu/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get update \ && apt-get install -y openssl libapr1 libnuma1 \ libmbedcrypto7 libmbedtls14 libmbedx509-1 libsubunit0 \ iproute2 ifupdown ethtool libnl-3-dev libnl-route-3-dev \ - libpcap0.8 libunwind8 \ + libpcap0.8 libunwind8 iptables \ gdb \ && rm -rf /var/lib/apt/lists/* diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index dd972a243..67a82b8fd 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -888,6 +888,46 @@ func (v *VppRunner) AllocatePhysicalNetworkVRFs(phyNet string) (err error) { return nil } +func (v *VppRunner) configureDHCPv6HopLimit() { + log.Infof("Configuring ip6tables mangle OUTPUT rule for DHCPv6 hop limit on host") + + checkCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-C", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + if err := checkCmd.Run(); err != nil { + outputCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-A", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + outputCmd.Stdout = os.Stdout + outputCmd.Stderr = os.Stderr + if err := outputCmd.Run(); err != nil { + log.Warnf("Failed to configure ip6tables mangle OUTPUT rule for DHCPv6: %v", err) + } + } else { + log.Infof("ip6tables mangle OUTPUT rule for DHCPv6 already present") + } +} + +func (v *VppRunner) cleanupDHCPv6HopLimit() { + log.Infof("Cleaning up ip6tables mangle OUTPUT rule for DHCPv6 hop limit on host") + + checkCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-C", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + if err := checkCmd.Run(); err == nil { + deleteCmd := exec.Command("/usr/sbin/ip6tables", "-t", "mangle", "-D", "OUTPUT", + "-p", "udp", "--sport", "546", "--dport", "547", + "-j", "HL", "--hl-set", "2") + deleteCmd.Stdout = os.Stdout + deleteCmd.Stderr = os.Stderr + if err := deleteCmd.Run(); err != nil { + log.Warnf("Failed to delete ip6tables mangle OUTPUT rule for DHCPv6: %v", err) + } + } else { + log.Infof("ip6tables mangle OUTPUT rule for DHCPv6 not present") + } +} + // Returns VPP exit code func (v *VppRunner) runVpp() (err error) { if !v.allInterfacesPhysical() { // use separate net namespace because linux deletes these interfaces when ns is deleted @@ -960,6 +1000,13 @@ func (v *VppRunner) runVpp() (err error) { return errors.Wrap(err, "Error configuring VPP") } + // FIXME This is a temporary workaround using ip6tables to set the hop limit for DHCPv6. + // Ideally, VPP should have a dedicated node for handling this. + // Without this, when forwarding a DHCPv6 SOLICIT/REQUEST packet, VPP will decrement the + // hop-limit by 1. Since client generates DHCPv6 SOLICIT/REQUEST with hop-limit=1, VPP + // drops it (ip6 ttl <= 1) with ICMP time exceeded and DHCPv6 lease negotiation fails. + v.configureDHCPv6HopLimit() + // add main network that has the default VRF config.Info.PhysicalNets[config.DefaultPhysicalNetworkName] = config.PhysicalNetwork{VrfID: common.DefaultVRFIndex, PodVrfID: common.PodVRFIndex} @@ -1024,6 +1071,7 @@ func (v *VppRunner) runVpp() (err error) { func (v *VppRunner) restoreConfiguration(allInterfacesPhysical bool) { log.Infof("Restoring configuration") + v.cleanupDHCPv6HopLimit() err := utils.ClearVppManagerFiles() if err != nil { log.Errorf("Error clearing vpp manager files: %v", err) From 2a6da78f0aa3a7191cc272612389a5068c51d078 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Wed, 21 Jan 2026 15:43:40 -0500 Subject: [PATCH 10/13] Filter out link-local routes from VPP main interface routing table Link-local addresses are not routable. When synchronizing Linux routes to VPP's uplink interface, filter out link-local addresses so that they are not added to VPP's main VRF routing table. Signed-off-by: Aritra Basu --- vpp-manager/vpp_runner.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index 67a82b8fd..ff423c750 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -555,6 +555,10 @@ func (v *VppRunner) configureVppUplinkInterface( } } for _, route := range ifState.Routes { + if route.Dst != nil && route.Dst.IP.IsLinkLocalUnicast() { + log.Infof("Skipping link-local route %s", route.Dst.String()) + continue + } err = v.vpp.RouteAdd(&types.Route{ Dst: route.Dst, Paths: []types.RoutePath{{ From b7f2b20d48c644611785abdc6785df9190749d75 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Sat, 24 Jan 2026 04:13:24 -0500 Subject: [PATCH 11/13] Add udev ID_NET_NAME_* property restoration for VPP interfaces MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capture ID_NET_NAME_* properties before VPP driver unbind and restore them via udev rules after VPP creates host-facing tap/tun interface. This is needed for IAID generation by DHCPv6 client in systemd-networkd to be consistent across VPP lifecycle on the node. Key changes: - Repurpose BEFORE_IF_READ hook to capture udev properties before driver unbind - Move SetInterfaceNames() before HookBeforeIfRead so interface names are available - Store ID_NET_NAME_* values and MAC address while interface still has original driver - Create udev rules for the interface to restore ID_NET_NAME_* values after VPP runs - Cleanup udev rules on VPP shutdown - BEFORE_IF_READ → capture, VPP_RUNNING → create, VPP_DONE_OK/ERRORED → cleanup - Add EnableUdevNetNameRules config knob in CalicoVppDebugConfigType (default: true) - Allows disabling udev net name rules generation (if needed). When disabled, skips captureHostUdevProps(), createUdevNetNameRules() and removeUdevNetNameRules() Signed-off-by: Aritra Basu --- config/config.go | 9 ++- config/default_hook.sh | 114 +++++++++++++++++++++++++++++++++++++- vpp-manager/vpp_runner.go | 3 + 3 files changed, 122 insertions(+), 4 deletions(-) diff --git a/config/config.go b/config/config.go index 7756c3db9..90f34386f 100644 --- a/config/config.go +++ b/config/config.go @@ -121,7 +121,8 @@ var ( DefaultHookScript string /* Run this before getLinuxConfig() in case this is a script - * that's responsible for creating the interface */ + * that's responsible for creating the interface. + * Also captures host udev ID_NET_NAME_* properties before driver unbind. */ HookScriptBeforeIfRead = StringEnvVar("CALICOVPP_HOOK_BEFORE_IF_READ", DefaultHookScript) // InitScriptTemplate /* Bash script template run just after getting config from $CALICOVPP_INTERFACE & before starting VPP */ @@ -164,7 +165,7 @@ func RunHook(hookScript *string, hookName string, params *VppManagerParams, log return } - cmd := exec.Command("/bin/bash", "-c", template, hookName) + cmd := exec.Command("/bin/bash", "-c", template, hookName, params.UplinksSpecs[0].InterfaceName) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr err = cmd.Run() @@ -294,6 +295,7 @@ type CalicoVppDebugConfigType struct { ServicesEnabled *bool `json:"servicesEnabled,omitempty"` GSOEnabled *bool `json:"gsoEnabled,omitempty"` SpreadTxQueuesOnWorkers *bool `json:"spreadTxQueuesOnWorkers,omitempty"` + EnableUdevNetNameRules *bool `json:"enableUdevNetNameRules,omitempty"` } func (cfg *CalicoVppDebugConfigType) String() string { @@ -311,6 +313,9 @@ func (cfg *CalicoVppDebugConfigType) Validate() (err error) { if cfg.SpreadTxQueuesOnWorkers == nil { cfg.SpreadTxQueuesOnWorkers = &False } + if cfg.EnableUdevNetNameRules == nil { + cfg.EnableUdevNetNameRules = &True + } return } diff --git a/config/default_hook.sh b/config/default_hook.sh index 967e773b4..2edf6e576 100644 --- a/config/default_hook.sh +++ b/config/default_hook.sh @@ -1,7 +1,10 @@ #!/bin/sh HOOK="$0" -chroot /host /bin/sh < /dev/null 2>&1; then @@ -39,6 +42,108 @@ restart_network () { fi } +capture_udev_net_name_properties () { + echo "default_hook: Capturing udev net name properties for $INTERFACE_NAME..." + + UDEV_INFO=$(udevadm info /sys/class/net/$INTERFACE_NAME 2>/dev/null) + if [ -z "$UDEV_INFO" ]; then + echo "default_hook: Failed to get udevadm info for $INTERFACE_NAME" + return + fi + + # Extract ID_NET_NAME_* properties + ID_NET_NAME_ONBOARD=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_ONBOARD=" | sed 's/.*ID_NET_NAME_ONBOARD=//') + ID_NET_NAME_SLOT=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_SLOT=" | sed 's/.*ID_NET_NAME_SLOT=//') + ID_NET_NAME_PATH=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_PATH=" | sed 's/.*ID_NET_NAME_PATH=//') + ID_NET_NAME_MAC=$(echo "$UDEV_INFO" | grep "ID_NET_NAME_MAC=" | sed 's/.*ID_NET_NAME_MAC=//') + + # Check if we have any properties to save + if [ -z "$ID_NET_NAME_ONBOARD" ] && [ -z "$ID_NET_NAME_SLOT" ] && [ -z "$ID_NET_NAME_PATH" ] && [ -z "$ID_NET_NAME_MAC" ]; then + echo "default_hook: No udev net name properties found for $INTERFACE_NAME" + return + fi + + # Get MAC address + MAC_ADDRESS=$(cat /sys/class/net/$INTERFACE_NAME/address 2>/dev/null) + if [ -z "$MAC_ADDRESS" ]; then + echo "default_hook: Failed to get MAC address for $INTERFACE_NAME" + return + fi + + # Save properties to temp file for later use + mkdir -p /var/run/vpp + echo "MAC_ADDRESS=$MAC_ADDRESS" > /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_ONBOARD" ] && echo "ID_NET_NAME_ONBOARD=$ID_NET_NAME_ONBOARD" >> /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_SLOT" ] && echo "ID_NET_NAME_SLOT=$ID_NET_NAME_SLOT" >> /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_PATH" ] && echo "ID_NET_NAME_PATH=$ID_NET_NAME_PATH" >> /var/run/vpp/udev_props_$INTERFACE_NAME + [ -n "$ID_NET_NAME_MAC" ] && echo "ID_NET_NAME_MAC=$ID_NET_NAME_MAC" >> /var/run/vpp/udev_props_$INTERFACE_NAME + + echo "default_hook: Captured udev properties for $INTERFACE_NAME (MAC: $MAC_ADDRESS)" + [ -n "$ID_NET_NAME_ONBOARD" ] && echo "default_hook: ID_NET_NAME_ONBOARD=$ID_NET_NAME_ONBOARD" + [ -n "$ID_NET_NAME_SLOT" ] && echo "default_hook: ID_NET_NAME_SLOT=$ID_NET_NAME_SLOT" + [ -n "$ID_NET_NAME_PATH" ] && echo "default_hook: ID_NET_NAME_PATH=$ID_NET_NAME_PATH" + [ -n "$ID_NET_NAME_MAC" ] && echo "default_hook: ID_NET_NAME_MAC=$ID_NET_NAME_MAC" +} + +create_udev_net_name_rule () { + PROPS_FILE="/var/run/vpp/udev_props_$INTERFACE_NAME" + if [ ! -f "$PROPS_FILE" ]; then + echo "default_hook: No udev properties captured for $INTERFACE_NAME, skipping rule creation" + return + fi + + # Source the properties file + . "$PROPS_FILE" + + if [ -z "$MAC_ADDRESS" ]; then + echo "default_hook: No MAC address captured for $INTERFACE_NAME, skipping rule creation" + return + fi + + echo "default_hook: Creating udev rule for $INTERFACE_NAME with MAC $MAC_ADDRESS..." + + # Build the udev rule + RULE_FILE="/etc/udev/rules.d/99-vpp-restore-id_net_name.rules" + echo "# Re-apply ID_NET_NAME_* properties after Calico VPP creates the host-facing tap/tun netdev." > "$RULE_FILE" + printf 'ACTION=="add", SUBSYSTEM=="net", ATTR{address}=="%s"' "$MAC_ADDRESS" >> "$RULE_FILE" + + [ -n "$ID_NET_NAME_ONBOARD" ] && printf ', ENV{ID_NET_NAME_ONBOARD}:="%s"' "$ID_NET_NAME_ONBOARD" >> "$RULE_FILE" + [ -n "$ID_NET_NAME_SLOT" ] && printf ', ENV{ID_NET_NAME_SLOT}:="%s"' "$ID_NET_NAME_SLOT" >> "$RULE_FILE" + [ -n "$ID_NET_NAME_PATH" ] && printf ', ENV{ID_NET_NAME_PATH}:="%s"' "$ID_NET_NAME_PATH" >> "$RULE_FILE" + [ -n "$ID_NET_NAME_MAC" ] && printf ', ENV{ID_NET_NAME_MAC}:="%s"' "$ID_NET_NAME_MAC" >> "$RULE_FILE" + + echo "" >> "$RULE_FILE" + + echo "default_hook: Created udev rule file at $RULE_FILE" + + # Reload udev rules + udevadm control --reload-rules + + # Trigger udev for net subsystem to apply the stored ID_NET_NAME_* properties + udevadm trigger --subsystem-match=net --action=add + echo "default_hook: Triggered udev to apply the stored ID_NET_NAME_* properties" +} + +remove_udev_net_name_rule () { + RULE_FILE="/etc/udev/rules.d/99-vpp-restore-id_net_name.rules" + PROPS_FILE="/var/run/vpp/udev_props_$INTERFACE_NAME" + + if [ -f "$RULE_FILE" ]; then + echo "default_hook: Removing udev rule file $RULE_FILE..." + rm -f "$RULE_FILE" + udevadm control --reload-rules + + # Trigger udev for net subsystem to remove the stored ID_NET_NAME_* properties + udevadm trigger --subsystem-match=net --action=change + echo "default_hook: Triggered udev to remove the stored ID_NET_NAME_* properties" + fi + + if [ -f "$PROPS_FILE" ]; then + rm -f "$PROPS_FILE" + fi +} + +echo "default_hook: Uplink interface name=$INTERFACE_NAME" if which systemctl > /dev/null; then echo "default_hook: using systemctl..." else @@ -46,15 +151,20 @@ else exit 1 fi -if [ "$HOOK" = "BEFORE_VPP_RUN" ]; then +if [ "$HOOK" = "BEFORE_IF_READ" ]; then + capture_udev_net_name_properties +elif [ "$HOOK" = "BEFORE_VPP_RUN" ]; then fix_dns elif [ "$HOOK" = "VPP_RUNNING" ]; then + create_udev_net_name_rule restart_network elif [ "$HOOK" = "VPP_DONE_OK" ]; then undo_dns_fix + remove_udev_net_name_rule restart_network elif [ "$HOOK" = "VPP_ERRORED" ]; then undo_dns_fix + remove_udev_net_name_rule restart_network fi diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index ff423c750..b493c4273 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -101,6 +101,9 @@ func (v *VppRunner) Run(drivers []uplink.UplinkDriver) error { return errors.Wrap(err, "Error generating VPP config") } + // Run hook to capture host udev properties before driver unbind + config.RunHook(config.HookScriptBeforeIfRead, "BEFORE_IF_READ", v.params, log) + for idx := range v.conf { err = v.uplinkDriver[idx].PreconfigureLinux() if err != nil { From 91688c2876ead58df30f55414bb807b507bfd339 Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Sat, 31 Jan 2026 14:22:43 -0500 Subject: [PATCH 12/13] ipv6: fix L3 MAC mismatch error for gateway traffic IPv6 ping between nodes fails with "l3 mac mismatch" error in VPP's ethernet-input node. Packets arriving on tap0 with destination MAC set to the infrastructure gateway's MAC are dropped. - IPv4 (ARP Proxy): Host sends ARP request, VPP responds with its own tap interface MAC. All subsequent IPv4 packets use VPP's MAC as the destination, passing VPP's L3 MAC filter check. - IPv6 (ND Proxy + Neighbor Advertisement): While VPP's ND proxy responds to Neighbor Solicitations with the tap interface MAC, the host also receives Neighbor Advertisement (NA) packets from the real gateway. These NA packets contain the Target Link-Layer Address Option (TLLAO) with the real gateway's MAC address. The host overwrites its neighbor cache with this information and sends IPv6 packets to the real gateway MAC instead of VPP's tap MAC. Capture the gateway's MAC address from Linux neighbor cache before VPP takes over the interface, then add it as a secondary MAC address on the tap interface using VPP's existing sw_interface_add_del_mac_address API. VPP's ethernet-input node accepts packets with either the primary MAC or any configured secondary MAC addresses, allowing traffic to flow regardless of which MAC address the host learned (from ND proxy or NA). This is a control plane only fix that requires no VPP patches. Signed-off-by: Aritra Basu --- config/config.go | 1 + vpp-manager/startup/interface_config.go | 5 +++++ vpp-manager/vpp_runner.go | 17 +++++++++++++++ vpplink/interfaces.go | 28 +++++++++++++++++++++++++ 4 files changed, 51 insertions(+) diff --git a/config/config.go b/config/config.go index 90f34386f..88ce64426 100644 --- a/config/config.go +++ b/config/config.go @@ -673,6 +673,7 @@ type LinuxInterfaceState struct { IsUp bool Addresses []netlink.Addr Routes []netlink.Route + Neighbors []netlink.Neigh HardwareAddr net.HardwareAddr PromiscOn bool NumTxQueues int diff --git a/vpp-manager/startup/interface_config.go b/vpp-manager/startup/interface_config.go index b5c857ce7..ea4957073 100644 --- a/vpp-manager/startup/interface_config.go +++ b/vpp-manager/startup/interface_config.go @@ -99,6 +99,11 @@ func loadInterfaceConfigFromLinux(ifSpec config.UplinkInterfaceSpec) (*config.Li return nil, errors.Wrapf(err, "cannot list %s routes", ifSpec.InterfaceName) } conf.SortRoutes() + + conf.Neighbors, err = netlink.NeighList(link.Attrs().Index, netlink.FAMILY_ALL) + if err != nil { + log.Warnf("cannot list %s neighbors: %v", ifSpec.InterfaceName, err) + } } conf.HardwareAddr = link.Attrs().HardwareAddr conf.NodeIP4 = getNodeAddress(&conf, false /* isV6 */) diff --git a/vpp-manager/vpp_runner.go b/vpp-manager/vpp_runner.go index b493c4273..ac3b679fd 100644 --- a/vpp-manager/vpp_runner.go +++ b/vpp-manager/vpp_runner.go @@ -677,6 +677,23 @@ func (v *VppRunner) configureVppUplinkInterface( if err != nil { log.Errorf("Error configuring ND proxy for gateway %s: %v", route.Gw, err) } + /* + * Add gateway MAC as secondary address on tap0 to fix L3 MAC mismatch. + * Unlike IPv4 where ARP proxy makes host learn VPP's MAC, in IPv6 the host may learn + * the infrastructure gateway MAC from Neighbor Advertisement (NA). Packets sent with + * this MAC would be dropped by VPP's ethernet-input with "l3 mac mismatch" error. + * Adding the gateway MAC as secondary address allows VPP to accept these packets. + */ + for _, neigh := range ifState.Neighbors { + if neigh.IP.Equal(route.Gw) && len(neigh.HardwareAddr) > 0 { + log.Infof("Adding gateway MAC %s as secondary address on tap0 for IPv6 L3 MAC acceptance", neigh.HardwareAddr) + err = v.vpp.AddInterfaceMacAddress(tapSwIfIndex, neigh.HardwareAddr) + if err != nil { + log.Errorf("Error adding gateway MAC as secondary address: %v", err) + } + break + } + } } } diff --git a/vpplink/interfaces.go b/vpplink/interfaces.go index 2cba3a0f0..bc29b7e3f 100644 --- a/vpplink/interfaces.go +++ b/vpplink/interfaces.go @@ -87,6 +87,34 @@ func (v *VppLink) SetInterfaceMacAddress(swIfIndex uint32, mac net.HardwareAddr) return nil } +func (v *VppLink) AddInterfaceMacAddress(swIfIndex uint32, mac net.HardwareAddr) error { + client := interfaces.NewServiceClient(v.GetConnection()) + + _, err := client.SwInterfaceAddDelMacAddress(v.GetContext(), &interfaces.SwInterfaceAddDelMacAddress{ + SwIfIndex: swIfIndex, + Addr: types.MacAddress(mac), + IsAdd: 1, + }) + if err != nil { + return fmt.Errorf("failed to add secondary MAC address: %w", err) + } + return nil +} + +func (v *VppLink) DelInterfaceMacAddress(swIfIndex uint32, mac net.HardwareAddr) error { + client := interfaces.NewServiceClient(v.GetConnection()) + + _, err := client.SwInterfaceAddDelMacAddress(v.GetContext(), &interfaces.SwInterfaceAddDelMacAddress{ + SwIfIndex: swIfIndex, + Addr: types.MacAddress(mac), + IsAdd: 0, + }) + if err != nil { + return fmt.Errorf("failed to delete secondary MAC address: %w", err) + } + return nil +} + func (v *VppLink) SetInterfaceVRF(swIfIndex, vrfIndex uint32, isIP6 bool) error { client := interfaces.NewServiceClient(v.GetConnection()) From 7950c9b59731020fdbb6a4e4e456b848a2c76fae Mon Sep 17 00:00:00 2001 From: Aritra Basu Date: Fri, 30 Jan 2026 01:04:46 -0500 Subject: [PATCH 13/13] routing: gracefully handle missing node IP during BGP announcements In dual-stack or IPv6-enabled clusters, the agent can crash when it attempts to announce or withdraw a BGP path for an IPv6 address, but the nodes does not have a corresponding IPv6 address configured in HostMetadata. Before this change, common.MakePath() returned a generic error ('no ip6 address for node'). That error was wrapped by the routing_server and propagated back to tomb, causing the routing watcher to stop and the main process to tear down (ending in a fatal gRPC server error). Changes: - Added sentinel errors ErrNoNodeIPv4 and ErrNoNodeIPv6 in common.go - Added helper function IsMissingNodeIP() to detect these specific errors - Updated MakePath() to return sentinel errors (including for SRv6 next-hop) - Updated routing_server and prefix_watcher to treat missing-node-IP as a non-fatal condition: log a warning indicating we skip announce/withdraw, returning nil so tomb does not enter Dying state This prevents the agent from crashing with a clear warning log for operators. Signed-off-by: Aritra Basu --- calico-vpp-agent/common/common.go | 20 ++++++++++++++++++-- calico-vpp-agent/routing/routing_server.go | 10 ++++++++++ calico-vpp-agent/watchers/prefix_watcher.go | 8 ++++++++ 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/calico-vpp-agent/common/common.go b/calico-vpp-agent/common/common.go index c8fcdfbec..009c9930d 100644 --- a/calico-vpp-agent/common/common.go +++ b/calico-vpp-agent/common/common.go @@ -181,6 +181,19 @@ func FullyQualified(addr net.IP) *net.IPNet { } } +var ( + ErrNoNodeIPv4 = errors.New("no ip4 address for node") + ErrNoNodeIPv6 = errors.New("no ip6 address for node") +) + +func IsMissingNodeIP(err error) bool { + if err == nil { + return false + } + cause := errors.Cause(err) + return cause == ErrNoNodeIPv4 || cause == ErrNoNodeIPv6 +} + const ( aggregatedPrefixSetBaseName = "aggregated" hostPrefixSetBaseName = "host" @@ -258,7 +271,7 @@ func MakePath(prefix string, isWithdrawal bool, nodeIPv4 *net.IP, nodeIPv6 *net. if ipNet.IP.To4() != nil { if nodeIPv4 == nil { - return nil, fmt.Errorf("no ip4 address for node") + return nil, ErrNoNodeIPv4 } family = &BgpFamilyUnicastIPv4 if vni != 0 { @@ -267,6 +280,9 @@ func MakePath(prefix string, isWithdrawal bool, nodeIPv4 *net.IP, nodeIPv6 *net. var nhAttr *apb.Any if *config.GetCalicoVppFeatureGates().SRv6Enabled { + if nodeIPv6 == nil { + return nil, ErrNoNodeIPv6 + } nhAttr, err = apb.New(&bgpapi.NextHopAttribute{ NextHop: nodeIPv6.String(), }) @@ -281,7 +297,7 @@ func MakePath(prefix string, isWithdrawal bool, nodeIPv4 *net.IP, nodeIPv6 *net. attrs = append(attrs, nhAttr) } else { if nodeIPv6 == nil { - return nil, fmt.Errorf("no ip6 address for node") + return nil, ErrNoNodeIPv6 } family = &BgpFamilyUnicastIPv6 if vni != 0 { diff --git a/calico-vpp-agent/routing/routing_server.go b/calico-vpp-agent/routing/routing_server.go index 51b4fc0c6..0c0fd8382 100644 --- a/calico-vpp-agent/routing/routing_server.go +++ b/calico-vpp-agent/routing/routing_server.go @@ -258,6 +258,11 @@ func (s *Server) announceLocalAddress(addr *net.IPNet, vni uint32) error { nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) path, err := common.MakePath(addr.String(), false /* isWithdrawal */, nodeIP4, nodeIP6, vni, uint32(*s.BGPConf.ASNumber)) if err != nil { + if common.IsMissingNodeIP(err) { + s.log.WithError(err).Warnf("Skipping BGP announce for %s: node IP missing", addr.String()) + s.localAddressMap[addr.String()] = localAddress{ipNet: addr, vni: vni} + return nil + } return errors.Wrap(err, "error making path to announce") } s.localAddressMap[addr.String()] = localAddress{ipNet: addr, vni: vni} @@ -273,6 +278,11 @@ func (s *Server) withdrawLocalAddress(addr *net.IPNet, vni uint32) error { nodeIP4, nodeIP6 := common.GetBGPSpecAddresses(s.nodeBGPSpec) path, err := common.MakePath(addr.String(), true /* isWithdrawal */, nodeIP4, nodeIP6, vni, uint32(*s.BGPConf.ASNumber)) if err != nil { + if common.IsMissingNodeIP(err) { + s.log.WithError(err).Warnf("Skipping BGP withdraw for %s: node IP missing", addr.String()) + delete(s.localAddressMap, addr.String()) + return nil + } return errors.Wrap(err, "error making path to withdraw") } delete(s.localAddressMap, addr.String()) diff --git a/calico-vpp-agent/watchers/prefix_watcher.go b/calico-vpp-agent/watchers/prefix_watcher.go index 1fa72b34c..97211e8f8 100644 --- a/calico-vpp-agent/watchers/prefix_watcher.go +++ b/calico-vpp-agent/watchers/prefix_watcher.go @@ -74,6 +74,10 @@ func (w *PrefixWatcher) WatchPrefix(t *tomb.Tomb) error { ip4, ip6 := common.GetBGPSpecAddresses(w.nodeBGPSpec) path, err := common.MakePath(prefix, false /* isWithdrawal */, ip4, ip6, 0, 0) if err != nil { + if common.IsMissingNodeIP(err) { + w.log.WithError(err).Warnf("Skipping prefix announcement for %s: node IP missing", prefix) + continue + } return errors.Wrap(err, "error making new path for assigned prefix") } toAdd = append(toAdd, path) @@ -90,6 +94,10 @@ func (w *PrefixWatcher) WatchPrefix(t *tomb.Tomb) error { ip4, ip6 := common.GetBGPSpecAddresses(w.nodeBGPSpec) path, err := common.MakePath(p, true /* isWithdrawal */, ip4, ip6, 0, 0) if err != nil { + if common.IsMissingNodeIP(err) { + w.log.WithError(err).Warnf("Skipping prefix withdrawal for %s: node IP missing", p) + continue + } return errors.Wrap(err, "error making new path for removed prefix") } toRemove = append(toRemove, path)