From 302949c08a84308f3a264becaba624f54a625739 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 23 Jun 2025 14:06:41 +0200 Subject: [PATCH 01/15] ixgbevf: remove legacy Rx Similarly as in commit 53844673d555 ("iavf: kill "legacy-rx" for good"), drop skb construction logic in favor of only using napi_build_skb() as a superior option that reduces the need to allocate and copy memory. When compared to iavf changes, ixgbevf has a single complication: MAC type 82599 cannot finely limit the DMA write size with RXDCTL.RLPML, only 1024 increments through SRRCTL are available, see commit fe68195daf34 ("ixgbevf: Require large buffers for build_skb on 82599VF") and commit 2bafa8fac19a ("ixgbe: don't set RXDCTL.RLPML for 82599"). Therefore, this is a special case requiring legacy RX unless large buffers are used. For now, solve this by always using large buffers for this MAC type. Suggested-by: Alexander Lobakin Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ethtool.c | 48 -------- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 13 +-- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 106 ++---------------- 3 files changed, 13 insertions(+), 154 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 7ac53171b0410c..a8404cdff27725 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -72,13 +72,6 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) -static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0) - "legacy-rx", -}; - -#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings) - static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -216,8 +209,6 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, ixgbevf_driver_name, sizeof(drvinfo->driver)); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); - - drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN; } static void ixgbevf_get_ringparam(struct net_device *netdev, @@ -409,8 +400,6 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) return IXGBEVF_TEST_LEN; case ETH_SS_STATS: return IXGBEVF_STATS_LEN; - case ETH_SS_PRIV_FLAGS: - return IXGBEVF_PRIV_FLAGS_STR_LEN; default: return -EINVAL; } @@ -538,10 +527,6 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; } break; - case ETH_SS_PRIV_FLAGS: - memcpy(data, ixgbevf_priv_flags_strings, - IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); - break; } } @@ -939,37 +924,6 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, return err; } -static u32 ixgbevf_get_priv_flags(struct net_device *netdev) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - u32 priv_flags = 0; - - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX; - - return priv_flags; -} - -static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - unsigned int flags = adapter->flags; - - flags &= ~IXGBEVF_FLAGS_LEGACY_RX; - if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX) - flags |= IXGBEVF_FLAGS_LEGACY_RX; - - if (flags != adapter->flags) { - adapter->flags = flags; - - /* reset interface to repopulate queues */ - if (netif_running(netdev)) - ixgbevf_reinit_locked(adapter); - } - - return 0; -} - static const struct ethtool_ops ixgbevf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbevf_get_drvinfo, @@ -992,8 +946,6 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, - .get_priv_flags = ixgbevf_get_priv_flags, - .set_priv_flags = ixgbevf_set_priv_flags, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3a379e6a3a2ab2..1cb20f6582566a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -73,7 +73,6 @@ struct ixgbevf_rx_queue_stats { enum ixgbevf_ring_state_t { __IXGBEVF_RX_3K_BUFFER, - __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, @@ -176,21 +175,13 @@ struct ixgbevf_ring { #define clear_ring_uses_large_buffer(ring) \ clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define ring_uses_build_skb(ring) \ - test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define set_ring_build_skb_enabled(ring) \ - set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define clear_ring_build_skb_enabled(ring) \ - clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) - static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) { #if (PAGE_SIZE < 8192) if (ring_uses_large_buffer(ring)) return IXGBEVF_RXBUFFER_3072; - if (ring_uses_build_skb(ring)) - return IXGBEVF_MAX_FRAME_BUILD_SKB; + return IXGBEVF_MAX_FRAME_BUILD_SKB; #endif return IXGBEVF_RXBUFFER_2048; } @@ -386,8 +377,6 @@ struct ixgbevf_adapter { u32 flags; bool link_state; -#define IXGBEVF_FLAGS_LEGACY_RX BIT(1) - #ifdef CONFIG_XFRM struct ixgbevf_ipsec *ipsec; #endif /* CONFIG_XFRM */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 535d0f71f52149..2c6befd93778fc 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -602,7 +602,7 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) { - return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; + return IXGBEVF_SKB_PAD; } static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, @@ -832,9 +832,7 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, #if (PAGE_SIZE < 8192) unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; #else - unsigned int truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : - SKB_DATA_ALIGN(size); + unsigned int truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); #endif skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); @@ -845,74 +843,6 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, #endif } -static -struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int size = xdp->data_end - xdp->data; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - /* Note, we get here by enabling legacy-rx via: - * - * ethtool --set-priv-flags legacy-rx on - * - * In this mode, we currently get 0 extra XDP headroom as - * opposed to having legacy-rx off, where we process XDP - * packets going to stack via ixgbevf_build_skb(). - * - * For ixgbevf_construct_skb() mode it means that the - * xdp->data_meta will always point to xdp->data, since - * the helper cannot expand the head. Should this ever - * changed in future for legacy-rx mode on, then lets also - * add xdp->data_meta handling here. - */ - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, xdp->data, - IXGBEVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, - ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - (xdp->data + headlen) - - page_address(rx_buffer->page), - size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - rx_buffer->pagecnt_bias++; - } - - return skb; -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -1092,10 +1022,8 @@ static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, #if (PAGE_SIZE < 8192) truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ #else - truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); + truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); #endif return truesize; } @@ -1182,12 +1110,9 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_bytes += size; } else if (skb) { ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); - } else if (ring_uses_build_skb(rx_ring)) { + } else { skb = ixgbevf_build_skb(rx_ring, rx_buffer, &xdp, rx_desc); - } else { - skb = ixgbevf_construct_skb(rx_ring, rx_buffer, - &xdp, rx_desc); } /* exit if we failed to retrieve a buffer */ @@ -1958,8 +1883,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, #if (PAGE_SIZE < 8192) /* Limit the maximum frame size so we don't overrun the skb */ - if (ring_uses_build_skb(ring) && - !ring_uses_large_buffer(ring)) + if (!ring_uses_large_buffer(ring)) rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | IXGBE_RXDCTL_RLPML_EN; #endif @@ -1978,22 +1902,16 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, struct net_device *netdev = adapter->netdev; unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - /* set build_skb and buffer size flags */ - clear_ring_build_skb_enabled(rx_ring); + /* set buffer size flags */ clear_ring_uses_large_buffer(rx_ring); - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - return; - if (PAGE_SIZE < 8192) - if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) + /* 82599 can't rely on RXDCTL.RLPML to restrict + * the size of the frame + */ + if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB || + adapter->hw.mac.type == ixgbe_mac_82599_vf) set_ring_uses_large_buffer(rx_ring); - - /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ - if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) - return; - - set_ring_build_skb_enabled(rx_ring); } /** From c492fe0f823329db0c121572ca600cb3a5f45b33 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 24 Jun 2025 12:49:22 +0200 Subject: [PATCH 02/15] ixgbevf: do not share pages between packets Again, same as in the related iavf commit 920d86f3c552 ("iavf: drop page splitting and recycling"), as an intermediate step, drop the page sharing and recycling logic in a preparation to offload it to page_pool. Instead of the previous sharing and recycling, just allocate a new page every time. Suggested-by: Alexander Lobakin Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 35 --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 234 ++---------------- 2 files changed, 23 insertions(+), 246 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 1cb20f6582566a..b11fd740da3e08 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -45,12 +45,7 @@ struct ixgbevf_tx_buffer { struct ixgbevf_rx_buffer { dma_addr_t dma; struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; }; struct ixgbevf_stats { @@ -144,7 +139,6 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 #define IXGBEVF_RXBUFFER_3072 3072 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 @@ -168,35 +162,6 @@ struct ixgbevf_ring { #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 -#define ring_uses_large_buffer(ring) \ - test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define set_ring_uses_large_buffer(ring) \ - set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define clear_ring_uses_large_buffer(ring) \ - clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) - -static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return IXGBEVF_RXBUFFER_3072; - - return IXGBEVF_MAX_FRAME_BUILD_SKB; -#endif - return IXGBEVF_RXBUFFER_2048; -} - -static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return 1; -#endif - return 0; -} - -#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring)) - #define check_for_tx_hang(ring) \ test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 2c6befd93778fc..1ca1abbdeaec87 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -112,9 +112,6 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -544,32 +541,14 @@ struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, size, DMA_FROM_DEVICE); - rx_buffer->pagecnt_bias--; - return rx_buffer; } static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct sk_buff *skb) + struct ixgbevf_rx_buffer *rx_buffer) { - if (ixgbevf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (IS_ERR(skb)) - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of rx_buffer */ + dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, + DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); rx_buffer->page = NULL; } @@ -600,38 +579,28 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } -static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) -{ - return IXGBEVF_SKB_PAD; -} - static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, struct ixgbevf_rx_buffer *bi) { struct page *page = bi->page; dma_addr_t dma; - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - /* alloc new page for storage */ - page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); + page = dev_alloc_pages(0); if (unlikely(!page)) { rx_ring->rx_stats.alloc_rx_page_failed++; return false; } /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - ixgbevf_rx_pg_size(rx_ring), + dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); /* if mapping failed free memory back to system since * there isn't much point in holding memory we can't use */ if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); + __free_page(page); rx_ring->rx_stats.alloc_rx_page_failed++; return false; @@ -639,8 +608,7 @@ static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, bi->dma = dma; bi->page = page; - bi->page_offset = ixgbevf_rx_offset(rx_ring); - bi->pagecnt_bias = 1; + bi->page_offset = IXGBEVF_SKB_PAD; rx_ring->rx_stats.alloc_rx_page++; return true; @@ -673,7 +641,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, bi->dma, bi->page_offset, - ixgbevf_rx_bufsz(rx_ring), + IXGBEVF_RXBUFFER_3072, DMA_FROM_DEVICE); /* Refresh the desc even if pkt_addr didn't change @@ -755,66 +723,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, return false; } -/** - * ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff) -{ - struct ixgbevf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->page = old_buff->page; - new_buff->dma = old_buff->dma; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IXGBEVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) - - if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) - return false; - -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - /** * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on @@ -829,18 +737,10 @@ static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, struct sk_buff *skb, unsigned int size) { -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); -#endif + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, rx_buffer->page_offset, size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif } static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, @@ -857,13 +757,9 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc) { unsigned int metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + SKB_DATA_ALIGN(xdp->data_end - xdp->data_hard_start); -#endif struct sk_buff *skb; /* Prefetch first cache line of first page. If xdp->data_meta @@ -884,13 +780,6 @@ static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, if (metasize) skb_metadata_set(skb, metasize); - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - return skb; } @@ -1014,38 +903,11 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, return result; } -static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, - unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -#endif - return truesize; -} - -static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - unsigned int size) -{ - unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size); - -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; @@ -1054,10 +916,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, int xdp_res = 0; /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); -#endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + xdp_init_buff(&xdp, IXGBEVF_RXBUFFER_3072, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { struct ixgbevf_rx_buffer *rx_buffer; @@ -1081,31 +940,24 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); - rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); + rx_buffer = + ixgbevf_get_rx_buffer(rx_ring, IXGBEVF_RXBUFFER_3072); /* retrieve a buffer from the ring */ if (!skb) { - unsigned int offset = ixgbevf_rx_offset(rx_ring); + unsigned int offset = rx_buffer->page_offset; unsigned char *hard_start; hard_start = page_address(rx_buffer->page) + rx_buffer->page_offset - offset; xdp_prepare_buff(&xdp, hard_start, offset, size, true); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size); -#endif xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); } if (xdp_res) { - if (xdp_res == IXGBEVF_XDP_TX) { + if (xdp_res == IXGBEVF_XDP_TX) xdp_xmit = true; - ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, - size); - } else { - rx_buffer->pagecnt_bias++; - } + total_rx_packets++; total_rx_bytes += size; } else if (skb) { @@ -1118,11 +970,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* exit if we failed to retrieve a buffer */ if (!xdp_res && !skb) { rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); + ixgbevf_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1699,10 +1550,7 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1880,13 +1728,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); - -#if (PAGE_SIZE < 8192) - /* Limit the maximum frame size so we don't overrun the skb */ - if (!ring_uses_large_buffer(ring)) - rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | - IXGBE_RXDCTL_RLPML_EN; -#endif } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; @@ -1896,24 +1737,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); } -static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring) -{ - struct net_device *netdev = adapter->netdev; - unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - - /* set buffer size flags */ - clear_ring_uses_large_buffer(rx_ring); - - if (PAGE_SIZE < 8192) - /* 82599 can't rely on RXDCTL.RLPML to restrict - * the size of the frame - */ - if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB || - adapter->hw.mac.type == ixgbe_mac_82599_vf) - set_ring_uses_large_buffer(rx_ring); -} - /** * ixgbevf_configure_rx - Configure 82599 VF Receive Unit after Reset * @adapter: board private structure @@ -1944,7 +1767,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; - ixgbevf_set_rx_buffer_len(adapter, rx_ring); ixgbevf_configure_rx_ring(adapter, rx_ring); } } @@ -2295,19 +2117,12 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) dma_sync_single_range_for_cpu(rx_ring->dev, rx_buffer->dma, rx_buffer->page_offset, - ixgbevf_rx_bufsz(rx_ring), + IXGBEVF_RXBUFFER_3072, DMA_FROM_DEVICE); /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, - rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - + ixgbevf_put_rx_buffer(rx_ring, rx_buffer); + __free_page(rx_buffer->page); i++; if (i == rx_ring->count) i = 0; @@ -4363,12 +4178,9 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) struct bpf_prog *old_prog; /* verify ixgbevf ring attributes are sufficient for XDP */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbevf_ring *ring = adapter->rx_ring[i]; - - if (frame_size > ixgbevf_rx_bufsz(ring)) + for (i = 0; i < adapter->num_rx_queues; i++) + if (frame_size > IXGBEVF_RXBUFFER_3072) return -EINVAL; - } old_prog = xchg(&adapter->xdp_prog, prog); From 360974dff01bef2fa1edfee11029dfbea7460197 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 9 Jul 2025 09:28:02 +0200 Subject: [PATCH 03/15] ixgbevf: use libeth in Rx processing Use page_pool buffers by the means of libeth in the Rx queues, this significantly reduces code complexity of the driver itself. Suggested-by: Alexander Lobakin Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ixgbevf/defines.h | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 25 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 287 ++++++------------ 4 files changed, 102 insertions(+), 213 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index b05cc0d7a15dcc..885632e305f3da 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -202,6 +202,7 @@ config IXGBE_IPSEC config IXGBEVF tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support" depends on PCI_MSI + select LIBETH help This driver supports Intel(R) PCI Express virtual functions for the Intel(R) ixgbe driver. For more information on how to identify your diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index a9bc96f6399dc0..4a656f72db0d78 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -70,7 +70,7 @@ typedef u32 ixgbe_link_speed; #define IXGBE_PSRTYPE_L2HDR 0x00001000 /* SRRCTL bit definitions */ -#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_BSIZEPKT_STEP 1024 /* specified in KB */ #define IXGBE_SRRCTL_RDMTS_SHIFT 22 #define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 #define IXGBE_SRRCTL_DROP_EN 0x10000000 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index b11fd740da3e08..37340f067feb55 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -42,12 +42,6 @@ struct ixgbevf_tx_buffer { u32 tx_flags; }; -struct ixgbevf_rx_buffer { - dma_addr_t dma; - struct page *page; - __u32 page_offset; -}; - struct ixgbevf_stats { u64 packets; u64 bytes; @@ -86,18 +80,21 @@ struct ixgbevf_ring { struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; struct bpf_prog *xdp_prog; - struct device *dev; + union { + struct page_pool *pp; /* Rx ring */ + struct device *dev; /* Tx ring */ + }; void *desc; /* descriptor ring memory */ dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ + u32 truesize; /* Rx buffer full size */ u16 count; /* amount of descriptors */ u16 next_to_use; u16 next_to_clean; - u16 next_to_alloc; union { struct ixgbevf_tx_buffer *tx_buffer_info; - struct ixgbevf_rx_buffer *rx_buffer_info; + struct libeth_fqe *rx_fqes; }; unsigned long state; struct ixgbevf_stats stats; @@ -116,6 +113,7 @@ struct ixgbevf_ring { */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ + u32 rx_buf_len; } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -145,13 +143,8 @@ struct ixgbevf_ring { #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#if (PAGE_SIZE < 8192) -#define IXGBEVF_MAX_FRAME_BUILD_SKB \ - (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD) -#else -#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048 -#endif +#define IXGBEVF_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 1ca1abbdeaec87..3e07c8cb7b880f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ixgbevf.h" @@ -82,6 +83,7 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); +MODULE_IMPORT_NS("LIBETH"); MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) @@ -304,7 +306,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, /* free the skb */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); + libeth_xdp_return_va(tx_buffer->data, true); else napi_consume_skb(tx_buffer->skb, napi_budget); @@ -521,35 +523,6 @@ static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); - - skb->protocol = eth_type_trans(skb, rx_ring->netdev); -} - -static -struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, - const unsigned int size) -{ - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - return rx_buffer; -} - -static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer) -{ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE, - DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); - rx_buffer->page = NULL; } /** @@ -579,41 +552,6 @@ static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, return true; } -static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* alloc new page for storage */ - page = dev_alloc_pages(0); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, PAGE_SIZE, - DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_page(page); - - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = IXGBEVF_SKB_PAD; - rx_ring->rx_stats.alloc_rx_page++; - - return true; -} - /** * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on @@ -623,39 +561,33 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, u16 cleaned_count) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbevf_rx_buffer *bi; - unsigned int i = rx_ring->next_to_use; + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; + u16 ntu = rx_ring->next_to_use; /* nothing to do or no valid netdev defined */ if (!cleaned_count || !rx_ring->netdev) return; - rx_desc = IXGBEVF_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; + rx_desc = IXGBEVF_RX_DESC(rx_ring, ntu); do { - if (!ixgbevf_alloc_mapped_page(rx_ring, bi)) - break; + dma_addr_t addr = libeth_rx_alloc(&fq, ntu); - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - IXGBEVF_RXBUFFER_3072, - DMA_FROM_DEVICE); + if (addr == DMA_MAPPING_ERROR) + return; - /* Refresh the desc even if pkt_addr didn't change - * because each write-back erases this info. - */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; - i++; - if (unlikely(!i)) { + ntu++; + if (unlikely(ntu == rx_ring->count)) { rx_desc = IXGBEVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; - i -= rx_ring->count; + ntu = 0; } /* clear the length for the next_to_use descriptor */ @@ -664,14 +596,9 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, cleaned_count--; } while (cleaned_count); - i += rx_ring->count; - - if (rx_ring->next_to_use != i) { + if (likely(rx_ring->next_to_use != ntu)) { /* record the next descriptor to use */ - rx_ring->next_to_use = i; - - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = i; + rx_ring->next_to_use = ntu; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -679,7 +606,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, * such as IA-64). */ wmb(); - ixgbevf_write_tail(rx_ring, i); + ixgbevf_write_tail(rx_ring, ntu); } } @@ -732,15 +659,15 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, * * This function will add the data contained in rx_buffer->page to the skb. **/ -static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, +static void ixgbevf_add_rx_frag(const struct libeth_fqe *rx_buffer, struct sk_buff *skb, unsigned int size) { - unsigned int truesize = SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size); + u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); + skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, + rx_buffer->netmem, rx_buffer->offset + hr, + size, rx_buffer->truesize); } static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, @@ -751,38 +678,6 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int metasize = xdp->data - xdp->data_meta; - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); - struct sk_buff *skb; - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points to xdp->data, otherwise, we likely - * have a consumer accessing first few bytes of meta data, - * and then actual data. - */ - net_prefetch(xdp->data_meta); - - /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); - if (unlikely(!skb)) - return NULL; - - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); - - return skb; -} - #define IXGBEVF_XDP_PASS 0 #define IXGBEVF_XDP_CONSUMED 1 #define IXGBEVF_XDP_TX 2 @@ -866,7 +761,7 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring, - struct xdp_buff *xdp) + struct libeth_xdp_buff *xdp) { int result = IXGBEVF_XDP_PASS; struct ixgbevf_ring *xdp_ring; @@ -878,13 +773,13 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, if (!xdp_prog) goto xdp_out; - act = bpf_prog_run_xdp(xdp_prog, xdp); + act = bpf_prog_run_xdp(xdp_prog, &xdp->base); switch (act) { case XDP_PASS: break; case XDP_TX: xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; - result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); + result = ixgbevf_xmit_xdp_ring(xdp_ring, &xdp->base); if (result == IXGBEVF_XDP_CONSUMED) goto out_failure; break; @@ -897,6 +792,7 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: result = IXGBEVF_XDP_CONSUMED; + libeth_xdp_return_buff(xdp); break; } xdp_out: @@ -911,16 +807,15 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); struct sk_buff *skb = rx_ring->skb; + LIBETH_XDP_ONSTACK_BUFF(xdp); bool xdp_xmit = false; - struct xdp_buff xdp; int xdp_res = 0; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ - xdp_init_buff(&xdp, IXGBEVF_RXBUFFER_3072, &rx_ring->xdp_rxq); + xdp->base.rxq = &rx_ring->xdp_rxq; while (likely(total_rx_packets < budget)) { - struct ixgbevf_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; + struct libeth_fqe *rx_buffer; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -940,18 +835,14 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); - rx_buffer = - ixgbevf_get_rx_buffer(rx_ring, IXGBEVF_RXBUFFER_3072); + rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + libeth_rx_sync_for_cpu(rx_buffer, size); /* retrieve a buffer from the ring */ if (!skb) { - unsigned int offset = rx_buffer->page_offset; - unsigned char *hard_start; - - hard_start = page_address(rx_buffer->page) + - rx_buffer->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); + libeth_xdp_prepare_buff(xdp, rx_buffer, size); + prefetch(xdp->data); + xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); } if (xdp_res) { @@ -961,10 +852,9 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_packets++; total_rx_bytes += size; } else if (skb) { - ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); + ixgbevf_add_rx_frag(rx_buffer, skb, size); } else { - skb = ixgbevf_build_skb(rx_ring, rx_buffer, - &xdp, rx_desc); + skb = xdp_build_skb_from_buff(&xdp->base); } /* exit if we failed to retrieve a buffer */ @@ -973,7 +863,6 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer); cleaned_count++; /* fetch next buffer in frame if non-eop */ @@ -1550,7 +1439,7 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ring->rx_buf_len / IXGBE_SRRCTL_BSIZEPKT_STEP; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1709,10 +1598,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0); ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx); - /* initialize rx_buffer_info */ - memset(ring->rx_buffer_info, 0, - sizeof(struct ixgbevf_rx_buffer) * ring->count); - /* initialize Rx descriptor 0 */ rx_desc = IXGBEVF_RX_DESC(ring, 0); rx_desc->wb.upper.length = 0; @@ -1720,14 +1605,17 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; - ring->next_to_alloc = 0; ixgbevf_configure_srrctl(adapter, ring, reg_idx); /* RXDCTL.RLPML does not work on 82599 */ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { - rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | - IXGBE_RXDCTL_RLPML_EN); + u32 pkt_len = + READ_ONCE(adapter->netdev->mtu) + LIBETH_RX_LL_LEN; + + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); + if (pkt_len <= IXGBE_RXDCTL_RLPMLMASK) + rxdctl |= pkt_len | IXGBE_RXDCTL_RLPML_EN; } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; @@ -2097,8 +1985,6 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) **/ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { - u16 i = rx_ring->next_to_clean; - /* Free Rx ring sk_buff */ if (rx_ring->skb) { dev_kfree_skb(rx_ring->skb); @@ -2106,29 +1992,14 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) } /* Free all the Rx ring pages */ - while (i != rx_ring->next_to_alloc) { - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[i]; + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - IXGBEVF_RXBUFFER_3072, - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - ixgbevf_put_rx_buffer(rx_ring, rx_buffer); - __free_page(rx_buffer->page); - i++; - if (i == rx_ring->count) + libeth_rx_recycle_slow(rx_fqe->netmem); + if (unlikely(++i == rx_ring->count)) i = 0; } - rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; } @@ -2147,7 +2018,7 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) /* Free all the Tx ring sk_buffs */ if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); + libeth_xdp_return_va(tx_buffer->data, false); else dev_kfree_skb_any(tx_buffer->skb); @@ -3227,12 +3098,25 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring) { - int size; + struct libeth_fq fq = { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_MTU, + .xdp = !!rx_ring->xdp_prog, + .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? + LIBETH_XDP_HEADROOM : + LIBETH_SKB_HEADROOM), + }; + int ret; - size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vmalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; u64_stats_init(&rx_ring->syncp); @@ -3240,25 +3124,29 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0) < 0) + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0, fq.buf_len); + if (ret) goto err; + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); + rx_ring->xdp_prog = adapter->xdp_prog; return 0; err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); - return -ENOMEM; + return ret; } /** @@ -3299,17 +3187,24 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) **/ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; + ixgbevf_clean_rx_ring(rx_ring); rx_ring->xdp_prog = NULL; + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - dma_free_coherent(rx_ring->dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(fq.pp->p.dev, rx_ring->size, rx_ring->desc, rx_ring->dma); - rx_ring->desc = NULL; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; } /** From 9060f71be23a197387c8d16804677f786ca525cb Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 28 Jul 2025 16:52:18 +0200 Subject: [PATCH 04/15] ixgbevf: branch prediction and cleanup Add likely/unlikely markers for better branch prediction. While touching some functions, cleanup the code a little bit. This patch is not supposed to make any logic changes. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 35 +++++++++---------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3e07c8cb7b880f..3f8c25733e0eb3 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -570,7 +570,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, u16 ntu = rx_ring->next_to_use; /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + if (unlikely(!cleaned_count || !rx_ring->netdev)) return; rx_desc = IXGBEVF_RX_DESC(rx_ring, ntu); @@ -578,14 +578,14 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, do { dma_addr_t addr = libeth_rx_alloc(&fq, ntu); - if (addr == DMA_MAPPING_ERROR) + if (unlikely(addr == DMA_MAPPING_ERROR)) return; rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; ntu++; - if (unlikely(ntu == rx_ring->count)) { + if (unlikely(ntu == fq.count)) { rx_desc = IXGBEVF_RX_DESC(rx_ring, 0); ntu = 0; } @@ -594,7 +594,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, rx_desc->wb.upper.length = 0; cleaned_count--; - } while (cleaned_count); + } while (likely(cleaned_count)); if (likely(rx_ring->next_to_use != ntu)) { /* record the next descriptor to use */ @@ -819,14 +819,14 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, unsigned int size; /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) { + if (unlikely(cleaned_count >= IXGBEVF_RX_BUFFER_WRITE)) { ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count); cleaned_count = 0; } rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); - if (!size) + if (unlikely(!size)) break; /* This memory barrier is needed to keep us from reading @@ -858,7 +858,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, } /* exit if we failed to retrieve a buffer */ - if (!xdp_res && !skb) { + if (unlikely(!xdp_res && !skb)) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } @@ -870,21 +870,19 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, continue; /* verify the packet layout is correct */ - if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) { + if (xdp_res || + unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - /* Workaround hardware that can't do proper VEPA multicast * source pruning. */ - if ((skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) && - ether_addr_equal(rx_ring->netdev->dev_addr, - eth_hdr(skb)->h_source)) { + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { dev_kfree_skb_irq(skb); continue; } @@ -892,13 +890,14 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* populate checksum, VLAN, and protocol */ ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; + total_rx_packets++; + ixgbevf_rx_skb(q_vector, skb); /* reset skb pointer */ skb = NULL; - - /* update budget accounting */ - total_rx_packets++; } /* place incomplete frames back on ring for completion */ From fef27ca2e80584c7d66d3908076cb74f2c9febc7 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 2 Sep 2025 16:31:51 +0200 Subject: [PATCH 05/15] ixgbevf: support XDP multi-buffer on Rx path Implement XDP support for received fragmented packets, this requires using some helpers from libeth_xdp. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/Kconfig | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 3 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 77 +++++-------------- 3 files changed, 22 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 885632e305f3da..a1ee0a3ccd3839 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -202,7 +202,7 @@ config IXGBE_IPSEC config IXGBEVF tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support" depends on PCI_MSI - select LIBETH + select LIBETH_XDP help This driver supports Intel(R) PCI Express virtual functions for the Intel(R) ixgbe driver. For more information on how to identify your diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 37340f067feb55..8b3c8c33f00bfa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "vf.h" @@ -106,7 +107,6 @@ struct ixgbevf_ring { struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; - struct sk_buff *skb; /* holds the special value that gets the hardware register offset * associated with this ring, which is different for DCB and RSS modes @@ -114,6 +114,7 @@ struct ixgbevf_ring { u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ u32 rx_buf_len; + struct libeth_xdp_buff_stash xdp_stash; } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3f8c25733e0eb3..5bd0685a97093e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -84,6 +84,7 @@ MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) @@ -650,26 +651,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, return false; } -/** - * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: size of buffer to be added - * - * This function will add the data contained in rx_buffer->page to the skb. - **/ -static void ixgbevf_add_rx_frag(const struct libeth_fqe *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ - u32 hr = netmem_get_pp(rx_buffer->netmem)->p.offset; - - skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, - rx_buffer->netmem, rx_buffer->offset + hr, - size, rx_buffer->truesize); -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -806,12 +787,12 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); - struct sk_buff *skb = rx_ring->skb; LIBETH_XDP_ONSTACK_BUFF(xdp); bool xdp_xmit = false; + struct sk_buff *skb; int xdp_res = 0; - xdp->base.rxq = &rx_ring->xdp_rxq; + libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -836,42 +817,34 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rmb(); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; - libeth_rx_sync_for_cpu(rx_buffer, size); + libeth_xdp_process_buff(xdp, rx_buffer, size); - /* retrieve a buffer from the ring */ - if (!skb) { - libeth_xdp_prepare_buff(xdp, rx_buffer, size); - prefetch(xdp->data); - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); - } + cleaned_count++; + /* fetch next buffer in frame if non-eop */ + if (ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; + xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); if (xdp_res) { if (xdp_res == IXGBEVF_XDP_TX) xdp_xmit = true; total_rx_packets++; total_rx_bytes += size; - } else if (skb) { - ixgbevf_add_rx_frag(rx_buffer, skb, size); - } else { - skb = xdp_build_skb_from_buff(&xdp->base); + continue; } + skb = xdp_build_skb_from_buff(&xdp->base); + xdp->data = NULL; + /* exit if we failed to retrieve a buffer */ - if (unlikely(!xdp_res && !skb)) { + if (unlikely(!skb)) { rx_ring->rx_stats.alloc_rx_buff_failed++; break; } - cleaned_count++; - - /* fetch next buffer in frame if non-eop */ - if (ixgbevf_is_non_eop(rx_ring, rx_desc)) - continue; - /* verify the packet layout is correct */ - if (xdp_res || - unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } @@ -895,13 +868,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_packets++; ixgbevf_rx_skb(q_vector, skb); - - /* reset skb pointer */ - skb = NULL; } /* place incomplete frames back on ring for completion */ - rx_ring->skb = skb; + libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); if (xdp_xmit) { struct ixgbevf_ring *xdp_ring = @@ -1985,10 +1955,7 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { /* Free Rx ring sk_buff */ - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } + libeth_xdp_return_stash(&rx_ring->xdp_stash); /* Free all the Rx ring pages */ for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { @@ -4067,15 +4034,9 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct ixgbevf_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; - /* verify ixgbevf ring attributes are sufficient for XDP */ - for (i = 0; i < adapter->num_rx_queues; i++) - if (frame_size > IXGBEVF_RXBUFFER_3072) - return -EINVAL; - old_prog = xchg(&adapter->xdp_prog, prog); /* If transitioning XDP modes reconfigure rings */ @@ -4093,7 +4054,7 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (netif_running(dev)) ixgbevf_open(dev); } else { - for (i = 0; i < adapter->num_rx_queues; i++) + for (int i = 0; i < adapter->num_rx_queues; i++) xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); } @@ -4259,7 +4220,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG; /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; From 885432256e6e90e46459c397c0239e79a9005026 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 9 Sep 2025 13:46:44 +0200 Subject: [PATCH 06/15] ixgbevf: XDP_TX in multi-buffer through libeth Use libeth to support XDP_TX action for segmented packets. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 14 +- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 290 ++++++++++++------ 2 files changed, 198 insertions(+), 106 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 8b3c8c33f00bfa..3fa80589b690d9 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -82,19 +82,21 @@ struct ixgbevf_ring { struct net_device *netdev; struct bpf_prog *xdp_prog; union { - struct page_pool *pp; /* Rx ring */ + struct page_pool *pp; /* Rx and XDP rings */ struct device *dev; /* Tx ring */ }; void *desc; /* descriptor ring memory */ - dma_addr_t dma; /* phys. address of descriptor ring */ - unsigned int size; /* length in bytes */ - u32 truesize; /* Rx buffer full size */ + union { + u32 truesize; /* Rx buffer full size */ + u32 pending; /* Sent-not-completed descriptors */ + }; u16 count; /* amount of descriptors */ - u16 next_to_use; u16 next_to_clean; + u32 next_to_use; union { struct ixgbevf_tx_buffer *tx_buffer_info; + struct libeth_sqe *xdp_sqes; struct libeth_fqe *rx_fqes; }; unsigned long state; @@ -115,6 +117,8 @@ struct ixgbevf_ring { int queue_index; /* needed for multiqueue queue management */ u32 rx_buf_len; struct libeth_xdp_buff_stash xdp_stash; + unsigned int dma_size; /* length in bytes */ + dma_addr_t dma; /* phys. address of descriptor ring */ } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 5bd0685a97093e..7980579f9646ae 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -306,10 +306,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, total_ipsec++; /* free the skb */ - if (ring_is_xdp(tx_ring)) - libeth_xdp_return_va(tx_buffer->data, true); - else - napi_consume_skb(tx_buffer->skb, napi_budget); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -392,9 +389,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -402,9 +398,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } - if (ring_is_xdp(tx_ring)) - return !!budget; - #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -663,44 +656,83 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, #define IXGBEVF_XDP_CONSUMED 1 #define IXGBEVF_XDP_TX 2 -static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, - struct xdp_buff *xdp) +static void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean) +{ + struct libeth_xdpsq_napi_stats stats; + u32 ntc = xdp_ring->next_to_clean; + struct xdp_frame_bulk cbulk; + struct libeth_cq_pp cp = { + .bq = &cbulk, + .dev = xdp_ring->dev, + .xss = &stats, + .napi = in_napi, + }; + + xdp_frame_bulk_init(&cbulk); + xdp_ring->pending -= to_clean; + + while (likely(to_clean--)) { + libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); + ntc++; + ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; + } + + xdp_ring->next_to_clean = ntc; + xdp_flush_frame_bulk(&cbulk); +} + +static u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) { - struct ixgbevf_tx_buffer *tx_buffer; - union ixgbe_adv_tx_desc *tx_desc; - u32 len, cmd_type; - dma_addr_t dma; - u16 i; + u16 ntc = xdp_ring->next_to_clean; + u16 to_clean = 0; - len = xdp->data_end - xdp->data; + while (likely(to_clean < xdp_ring->pending)) { + u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + union ixgbe_adv_tx_desc *rs_desc; - if (unlikely(!ixgbevf_desc_unused(ring))) - return IXGBEVF_XDP_CONSUMED; + if (!idx--) + break; - dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); - if (dma_mapping_error(ring->dev, dma)) - return IXGBEVF_XDP_CONSUMED; + rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); - /* record the location of the first descriptor for this packet */ - i = ring->next_to_use; - tx_buffer = &ring->tx_buffer_info[i]; - - dma_unmap_len_set(tx_buffer, len, len); - dma_unmap_addr_set(tx_buffer, dma, dma); - tx_buffer->data = xdp->data; - tx_buffer->bytecount = len; - tx_buffer->gso_segs = 1; - tx_buffer->protocol = 0; - - /* Populate minimal context descriptor that will provide for the - * fact that we are expected to process Ethernet frames. - */ - if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) { + if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + xdp_ring->xdp_sqes[ntc].rs_idx = 0; + + to_clean += + (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + + ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + } + + return to_clean; +} + +static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) +{ + ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); +} + +static u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + + if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); + + if (likely(to_clean)) + ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); + } + + if (unlikely(!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, + &xdp_ring->state))) { struct ixgbe_adv_tx_context_desc *context_desc; - set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &xdp_ring->state); - context_desc = IXGBEVF_TX_CTXTDESC(ring, 0); + context_desc = IXGBEVF_TX_CTXTDESC(xdp_ring, 0); context_desc->vlan_macip_lens = cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); context_desc->fceof_saidx = 0; @@ -709,48 +741,98 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, IXGBE_ADVTXD_DTYP_CTXT); context_desc->mss_l4len_idx = 0; - i = 1; + xdp_ring->next_to_use = 1; + xdp_ring->pending = 1; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, 1); } - /* put descriptor type bits */ - cmd_type = IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_DEXT | - IXGBE_ADVTXD_DCMD_IFCS; - cmd_type |= len | IXGBE_TXD_CMD; + *sq = (struct libeth_xdpsq) { + .count = xdp_ring->count, + .descs = xdp_ring->desc, + .lock = NULL, + .ntu = &xdp_ring->next_to_use, + .pending = &xdp_ring->pending, + .pool = NULL, + .sqes = xdp_ring->xdp_sqes, + }; + + return ixgbevf_desc_unused(xdp_ring); +} - tx_desc = IXGBEVF_TX_DESC(ring, i); - tx_desc->read.buffer_addr = cpu_to_le64(dma); +static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, + u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - tx_desc->read.olinfo_status = - cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + desc.len; + + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd_type |= IXGBE_TXD_CMD_EOP; + + if (desc.flags & LIBETH_XDP_TX_FIRST) { + struct skb_shared_info *sinfo = sq->sqes[i].sinfo; + u16 full_len = desc.len + sinfo->xdp_frags_size; + + tx_desc->read.olinfo_status = + cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | IXGBE_ADVTXD_CC); + } - /* Avoid any potential race with cleanup */ - smp_wmb(); + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); +} - /* set next_to_watch value indicating a packet is present */ - i++; - if (i == ring->count) - i = 0; +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_END(); - tx_buffer->next_to_watch = tx_desc; - ring->next_to_use = i; +static void ixgbevf_xdp_set_rs(struct ixgbevf_ring *xdp_ring, u32 cached_ntu) +{ + u32 ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + union ixgbe_adv_tx_desc *desc; - return IXGBEVF_XDP_TX; + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); + xdp_ring->xdp_sqes[cached_ntu].rs_idx = ltu + 1; + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); } -static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring, +static void ixgbevf_rx_finalize_xdp(struct libeth_xdp_tx_bulk *tx_bulk, + bool xdp_xmit, u32 cached_ntu) +{ + struct ixgbevf_ring *xdp_ring = tx_bulk->xdpsq; + + if (!xdp_xmit) + goto unlock; + + if (tx_bulk->count) + ixgbevf_xdp_flush_tx(tx_bulk, LIBETH_XDP_TX_DROP); + + ixgbevf_xdp_set_rs(xdp_ring, cached_ntu); + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); +unlock: + rcu_read_unlock(); +} + +static int ixgbevf_run_xdp(struct libeth_xdp_tx_bulk *tx_bulk, struct libeth_xdp_buff *xdp) { int result = IXGBEVF_XDP_PASS; - struct ixgbevf_ring *xdp_ring; - struct bpf_prog *xdp_prog; + const struct bpf_prog *xdp_prog; u32 act; - xdp_prog = READ_ONCE(rx_ring->xdp_prog); - + xdp_prog = tx_bulk->prog; if (!xdp_prog) goto xdp_out; @@ -759,17 +841,14 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, case XDP_PASS: break; case XDP_TX: - xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; - result = ixgbevf_xmit_xdp_ring(xdp_ring, &xdp->base); - if (result == IXGBEVF_XDP_CONSUMED) - goto out_failure; + result = IXGBEVF_XDP_TX; + libeth_xdp_tx_queue_bulk(tx_bulk, xdp, ixgbevf_xdp_flush_tx); break; default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); + bpf_warn_invalid_xdp_action(tx_bulk->dev, xdp_prog, act); fallthrough; case XDP_ABORTED: -out_failure: - trace_xdp_exception(rx_ring->netdev, xdp_prog, act); + trace_xdp_exception(tx_bulk->dev, xdp_prog, act); fallthrough; /* handle aborts by dropping packet */ case XDP_DROP: result = IXGBEVF_XDP_CONSUMED; @@ -787,12 +866,20 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); LIBETH_XDP_ONSTACK_BUFF(xdp); + u32 cached_ntu; bool xdp_xmit = false; struct sk_buff *skb; int xdp_res = 0; libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); + libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + if (xdp_tx_bulk.prog) + cached_ntu = + ((struct ixgbevf_ring *)xdp_tx_bulk.xdpsq)->next_to_use; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -824,11 +911,12 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, if (ixgbevf_is_non_eop(rx_ring, rx_desc)) continue; - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, xdp); + xdp_res = ixgbevf_run_xdp(&xdp_tx_bulk, xdp); if (xdp_res) { if (xdp_res == IXGBEVF_XDP_TX) xdp_xmit = true; + xdp->data = NULL; total_rx_packets++; total_rx_bytes += size; continue; @@ -873,16 +961,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - if (xdp_xmit) { - struct ixgbevf_ring *xdp_ring = - adapter->xdp_ring[rx_ring->queue_index]; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. - */ - wmb(); - ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); - } + ixgbevf_rx_finalize_xdp(&xdp_tx_bulk, xdp_xmit, cached_ntu); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -912,6 +991,8 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { + if (ring_is_xdp(ring)) + continue; if (!ixgbevf_clean_tx_irq(q_vector, ring, budget)) clean_complete = false; } @@ -1351,6 +1432,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending = 0; /* In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is @@ -1363,8 +1445,12 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - memset(ring->tx_buffer_info, 0, - sizeof(struct ixgbevf_tx_buffer) * ring->count); + if (!ring_is_xdp(ring)) + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbevf_tx_buffer) * ring->count); + else + memset(ring->xdp_sqes, 0, + sizeof(struct libeth_sqe) * ring->count); clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -1983,10 +2069,7 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - if (ring_is_xdp(tx_ring)) - libeth_xdp_return_va(tx_buffer->data, false); - else - dev_kfree_skb_any(tx_buffer->skb); + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2055,7 +2138,7 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++) - ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); + ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); } void ixgbevf_down(struct ixgbevf_adapter *adapter) @@ -2944,7 +3027,10 @@ static void ixgbevf_service_task(struct work_struct *work) **/ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) { - ixgbevf_clean_tx_ring(tx_ring); + if (!ring_is_xdp(tx_ring)) + ixgbevf_clean_tx_ring(tx_ring); + else + ixgbevf_clean_xdp_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; @@ -2953,7 +3039,7 @@ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) if (!tx_ring->desc) return; - dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc, + dma_free_coherent(tx_ring->dev, tx_ring->dma_size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; @@ -2988,7 +3074,9 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; - size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; + size = (!ring_is_xdp(tx_ring) ? sizeof(struct ixgbevf_tx_buffer) : + sizeof(struct libeth_sqe)) * tx_ring->count; + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -2996,10 +3084,10 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->dma_size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); + tx_ring->dma_size = ALIGN(tx_ring->dma_size, 4096); - tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->dma_size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -3087,10 +3175,10 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); + rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) @@ -3164,7 +3252,7 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - dma_free_coherent(fq.pp->p.dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(fq.pp->p.dev, rx_ring->dma_size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; From 88505900a3c1b3c9fce2624ee83cfdb75766a7c8 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 22 Sep 2025 07:14:24 +0200 Subject: [PATCH 07/15] ixgbevf: support XDP_REDIRECT and .ndo_xdp_xmit To fully support XDP_REDIRECT, utilize more libeth helpers in XDP Rx path, hence save cached_ntu in the ring structure instead of stack. ixgbevf-supported VFs usually have few queues, so use libeth_xdpsq_lock functionality for XDP queue sharing. Adjust filling-in of XDP Tx descriptors to use data from xdp frame. Otherwise, simply use libeth helpers to implement .ndo_xdp_xmit(). Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 140 ++++++++---------- include/net/libeth/xdp.h | 2 +- 3 files changed, 64 insertions(+), 80 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3fa80589b690d9..26626067e131aa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -99,6 +99,8 @@ struct ixgbevf_ring { struct libeth_sqe *xdp_sqes; struct libeth_fqe *rx_fqes; }; + struct libeth_xdpsq_lock xdpq_lock; + u32 cached_ntu; unsigned long state; struct ixgbevf_stats stats; struct u64_stats_sync syncp; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 7980579f9646ae..8dc2d84e48eeba 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -652,10 +652,6 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -#define IXGBEVF_XDP_PASS 0 -#define IXGBEVF_XDP_CONSUMED 1 -#define IXGBEVF_XDP_TX 2 - static void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, u16 to_clean) { @@ -713,12 +709,14 @@ static u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) { ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); + libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); } static u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) { struct ixgbevf_ring *xdp_ring = xdpsq; + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); @@ -752,7 +750,7 @@ static u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) *sq = (struct libeth_xdpsq) { .count = xdp_ring->count, .descs = xdp_ring->desc, - .lock = NULL, + .lock = &xdp_ring->xdpq_lock, .ntu = &xdp_ring->next_to_use, .pending = &xdp_ring->pending, .pool = NULL, @@ -778,9 +776,13 @@ static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, cmd_type |= IXGBE_TXD_CMD_EOP; if (desc.flags & LIBETH_XDP_TX_FIRST) { - struct skb_shared_info *sinfo = sq->sqes[i].sinfo; - u16 full_len = desc.len + sinfo->xdp_frags_size; + struct libeth_sqe *sqe = &sq->sqes[i]; + struct skb_shared_info *sinfo; + u16 full_len; + sinfo = sqe->type == LIBETH_SQE_XDP_TX ? sqe->sinfo : + xdp_get_shared_info_from_frame(sqe->xdpf); + full_len = desc.len + sinfo->xdp_frags_size; tx_desc->read.olinfo_status = cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | IXGBE_ADVTXD_CC); @@ -790,74 +792,36 @@ static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); } -LIBETH_XDP_DEFINE_START(); -LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, - ixgbevf_xdp_xmit_desc); -LIBETH_XDP_DEFINE_END(); - -static void ixgbevf_xdp_set_rs(struct ixgbevf_ring *xdp_ring, u32 cached_ntu) +static void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) { - u32 ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + struct ixgbevf_ring *xdp_ring = xdpsq; union ixgbe_adv_tx_desc *desc; + u32 ltu; + + if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || + xdp_ring->cached_ntu == xdp_ring->next_to_use) + return; + ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; desc = IXGBEVF_TX_DESC(xdp_ring, ltu); - xdp_ring->xdp_sqes[cached_ntu].rs_idx = ltu + 1; + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); -} - -static void ixgbevf_rx_finalize_xdp(struct libeth_xdp_tx_bulk *tx_bulk, - bool xdp_xmit, u32 cached_ntu) -{ - struct ixgbevf_ring *xdp_ring = tx_bulk->xdpsq; - - if (!xdp_xmit) - goto unlock; - - if (tx_bulk->count) - ixgbevf_xdp_flush_tx(tx_bulk, LIBETH_XDP_TX_DROP); - - ixgbevf_xdp_set_rs(xdp_ring, cached_ntu); + xdp_ring->cached_ntu = xdp_ring->next_to_use; /* Finish descriptor writes before bumping tail */ wmb(); ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); -unlock: - rcu_read_unlock(); } -static int ixgbevf_run_xdp(struct libeth_xdp_tx_bulk *tx_bulk, - struct libeth_xdp_buff *xdp) -{ - int result = IXGBEVF_XDP_PASS; - const struct bpf_prog *xdp_prog; - u32 act; - - xdp_prog = tx_bulk->prog; - if (!xdp_prog) - goto xdp_out; - - act = bpf_prog_run_xdp(xdp_prog, &xdp->base); - switch (act) { - case XDP_PASS: - break; - case XDP_TX: - result = IXGBEVF_XDP_TX; - libeth_xdp_tx_queue_bulk(tx_bulk, xdp, ixgbevf_xdp_flush_tx); - break; - default: - bpf_warn_invalid_xdp_action(tx_bulk->dev, xdp_prog, act); - fallthrough; - case XDP_ABORTED: - trace_xdp_exception(tx_bulk->dev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = IXGBEVF_XDP_CONSUMED; - libeth_xdp_return_buff(xdp); - break; - } -xdp_out: - return result; -} +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbevf_xdp_flush_xmit, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_RUN_PROG(static ixgbevf_xdp_run_prog, ixgbevf_xdp_flush_tx); +LIBETH_XDP_DEFINE_FINALIZE(static ixgbevf_xdp_finalize_xdp_napi, + ixgbevf_xdp_flush_tx, ixgbevf_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, @@ -868,18 +832,12 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, u16 cleaned_count = ixgbevf_desc_unused(rx_ring); LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); LIBETH_XDP_ONSTACK_BUFF(xdp); - u32 cached_ntu; - bool xdp_xmit = false; struct sk_buff *skb; - int xdp_res = 0; libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, adapter->netdev, adapter->xdp_ring, adapter->num_xdp_queues); - if (xdp_tx_bulk.prog) - cached_ntu = - ((struct ixgbevf_ring *)xdp_tx_bulk.xdpsq)->next_to_use; while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; @@ -911,11 +869,8 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, if (ixgbevf_is_non_eop(rx_ring, rx_desc)) continue; - xdp_res = ixgbevf_run_xdp(&xdp_tx_bulk, xdp); - if (xdp_res) { - if (xdp_res == IXGBEVF_XDP_TX) - xdp_xmit = true; - + if (xdp_tx_bulk.prog && + !ixgbevf_xdp_run_prog(xdp, &xdp_tx_bulk)) { xdp->data = NULL; total_rx_packets++; total_rx_bytes += size; @@ -961,7 +916,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* place incomplete frames back on ring for completion */ libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - ixgbevf_rx_finalize_xdp(&xdp_tx_bulk, xdp_xmit, cached_ntu); + ixgbevf_xdp_finalize_xdp_napi(&xdp_tx_bulk); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -973,6 +928,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, return total_rx_packets; } +static int ixgbevf_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + + if (test_bit(__IXGBEVF_DOWN, &adapter->state)) + return -ENETDOWN; + + if (!adapter->num_xdp_queues) + return -ENXIO; + + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, adapter->xdp_ring, + adapter->num_xdp_queues, + ixgbevf_xdp_flush_xmit, + ixgbevf_xdp_rs_and_bump); +} + /** * ixgbevf_poll - NAPI polling calback * @napi: napi struct with our devices info in it @@ -1433,6 +1405,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, ring->next_to_clean = 0; ring->next_to_use = 0; ring->pending = 0; + ring->cached_ntu = 0; /* In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is @@ -1445,12 +1418,15 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - if (!ring_is_xdp(ring)) + if (!ring_is_xdp(ring)) { memset(ring->tx_buffer_info, 0, sizeof(struct ixgbevf_tx_buffer) * ring->count); - else + } else { memset(ring->xdp_sqes, 0, sizeof(struct libeth_sqe) * ring->count); + libeth_xdpsq_get(&ring->xdpq_lock, ring->netdev, + num_possible_cpus() > adapter->num_xdp_queues); + } clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -4126,6 +4102,7 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) struct bpf_prog *old_prog; old_prog = xchg(&adapter->xdp_prog, prog); + xdp_features_clear_redirect_target(dev); /* If transitioning XDP modes reconfigure rings */ if (!!prog != !!old_prog) { @@ -4149,6 +4126,9 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (old_prog) bpf_prog_put(old_prog); + if (prog) + xdp_features_set_redirect_target(dev, true); + return 0; } @@ -4176,6 +4156,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_vlan_rx_kill_vid = ixgbevf_vlan_rx_kill_vid, .ndo_features_check = ixgbevf_features_check, .ndo_bpf = ixgbevf_xdp, + .ndo_xdp_xmit = ixgbevf_xdp_xmit, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -4308,7 +4289,8 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG | + NETDEV_XDP_ACT_REDIRECT; /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index f4880b50e804c2..add05f351207d3 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1094,7 +1094,7 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, * @xqs: array of XDPSQs driver structs * @nqs: number of active XDPSQs, the above array length * @fl: driver callback to flush an XDP xmit bulk - * @fin: driver cabback to finalize the queue + * @fin: driver callback to finalize the queue * * If the driver has active XDPSQs, perform common checks and send the frames. * Finalize the queue, if requested. From 3df2b57d527cc0ed6be5ee9f4d090490157df433 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Wed, 22 Oct 2025 18:19:46 +0200 Subject: [PATCH 08/15] ixgbevf: add a helper to flush Tx queue The same register write operation is already used twice in code, it will be used again by AF_XDP configuration. Wrap it in a helper function. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 8dc2d84e48eeba..8165f8b02fd9aa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2117,10 +2117,17 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); } +static void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + + IXGBE_WRITE_REG(&ring->q_vector->adapter->hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); +} + void ixgbevf_down(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct ixgbe_hw *hw = &adapter->hw; int i; /* signal that we are down to the interrupt handler */ @@ -2146,19 +2153,11 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) timer_delete_sync(&adapter->service_timer); /* disable transmits in the hardware now that interrupts are off */ - for (i = 0; i < adapter->num_tx_queues; i++) { - u8 reg_idx = adapter->tx_ring[i]->reg_idx; - - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } - - for (i = 0; i < adapter->num_xdp_queues; i++) { - u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbevf_flush_tx_queue(adapter->tx_ring[i]); - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_flush_tx_queue(adapter->xdp_ring[i]); if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); From 76e6cc9d425d06901d796fdeffc4da56b7ca52c6 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 24 Oct 2025 12:19:51 +0200 Subject: [PATCH 09/15] ixgbevf: move skb-filling code to a header AF_XDP ZC Rx path is also required to implement skb creation. Move all common functions to a header file as inlines. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 193 +----------------- .../ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h | 190 +++++++++++++++++ 2 files changed, 192 insertions(+), 191 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 8165f8b02fd9aa..ea696049ca5e59 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -33,7 +33,7 @@ #include #include -#include "ixgbevf.h" +#include "ixgbevf_txrx_lib.h" const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = @@ -418,134 +418,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return !!budget; } -/** - * ixgbevf_rx_skb - Helper function to determine proper Rx method - * @q_vector: structure containing interrupt and ring information - * @skb: packet to send up - **/ -static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector, - struct sk_buff *skb) -{ - napi_gro_receive(&q_vector->napi, skb); -} - -#define IXGBE_RSS_L4_TYPES_MASK \ - ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) - -static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - u16 rss_type; - - if (!(ring->netdev->features & NETIF_F_RXHASH)) - return; - - rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & - IXGBE_RXDADV_RSSTYPE_MASK; - - if (!rss_type) - return; - - skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? - PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); -} - -/** - * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum - * @ring: structure containig ring specific data - * @rx_desc: current Rx descriptor being processed - * @skb: skb currently being received and modified - **/ -static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - skb_checksum_none_assert(skb); - - /* Rx csum disabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) - return; - - /* if IP and error */ - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && - ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { - ring->rx_stats.csum_err++; - return; - } - - if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) - return; - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - ring->rx_stats.csum_err++; - return; - } - - /* It must be a TCP or UDP packet with a valid checksum */ - skb->ip_summed = CHECKSUM_UNNECESSARY; -} - -/** - * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * - * This function checks the ring, descriptor, and packet information in - * order to populate the checksum, VLAN, protocol, and other fields within - * the skb. - **/ -static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - ixgbevf_rx_hash(rx_ring, rx_desc, skb); - ixgbevf_rx_checksum(rx_ring, rx_desc, skb); - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { - u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); - unsigned long *active_vlans = netdev_priv(rx_ring->netdev); - - if (test_bit(vid & VLAN_VID_MASK, active_vlans)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); - } - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) - ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); -} - -/** - * ixgbevf_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); - - if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; - - return true; -} - /** * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on @@ -604,46 +476,6 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, } } -/** - * ixgbevf_cleanup_headers - Correct corrupted or empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being fixed - * - * Check for corrupted packet headers caused by senders on the local L2 - * embedded NIC switch not setting up their Tx Descriptors right. These - * should be very rare. - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - /* verify that the packet does not have any known errors */ - if (unlikely(ixgbevf_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { - struct net_device *netdev = rx_ring->netdev; - - if (!(netdev->features & NETIF_F_RXALL)) { - dev_kfree_skb_any(skb); - return true; - } - } - - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -792,27 +624,6 @@ static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); } -static void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) -{ - struct ixgbevf_ring *xdp_ring = xdpsq; - union ixgbe_adv_tx_desc *desc; - u32 ltu; - - if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || - xdp_ring->cached_ntu == xdp_ring->next_to_use) - return; - - ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; - desc = IXGBEVF_TX_DESC(xdp_ring, ltu); - xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; - desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); - xdp_ring->cached_ntu = xdp_ring->next_to_use; - - /* Finish descriptor writes before bumping tail */ - wmb(); - ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); -} - LIBETH_XDP_DEFINE_START(); LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, ixgbevf_xdp_xmit_desc); @@ -910,7 +721,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, total_rx_bytes += skb->len; total_rx_packets++; - ixgbevf_rx_skb(q_vector, skb); + napi_gro_receive(&q_vector->napi, skb); } /* place incomplete frames back on ring for completion */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h new file mode 100644 index 00000000000000..3d37e9588eb516 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 1999 - 2025 Intel Corporation. */ + +#ifndef _IXGBEVF_TXRX_LIB_H_ +#define _IXGBEVF_TXRX_LIB_H_ + +#include + +#include "ixgbevf.h" + +static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + union ixgbe_adv_tx_desc *desc; + u32 ltu; + + if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || + xdp_ring->cached_ntu == xdp_ring->next_to_use) + return; + + ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); + xdp_ring->cached_ntu = xdp_ring->next_to_use; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); +} + +/** + * ixgbevf_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static inline bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + rx_ring->pending++; + + prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + return true; +} + +/** + * ixgbevf_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static inline bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbevf_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { + struct net_device *netdev = rx_ring->netdev; + + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; + + return false; +} + +#define IXGBE_RSS_L4_TYPES_MASK \ + ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) + +static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 rss_type; + + if (!(ring->netdev->features & NETIF_F_RXHASH)) + return; + + rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + IXGBE_RXDADV_RSSTYPE_MASK; + + if (!rss_type) + return; + + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); +} + +/** + * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed + * @skb: skb currently being received and modified + **/ +static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Rx csum disabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* if IP and error */ + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && + ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { + ring->rx_stats.csum_err++; + return; + } + + if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) + return; + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { + ring->rx_stats.csum_err++; + return; + } + + /* It must be a TCP or UDP packet with a valid checksum */ + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +/** + * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the checksum, VLAN, protocol, and other fields within + * the skb. + **/ +static inline void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + ixgbevf_rx_hash(rx_ring, rx_desc, skb); + ixgbevf_rx_checksum(rx_ring, rx_desc, skb); + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { + u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); + unsigned long *active_vlans = netdev_priv(rx_ring->netdev); + + if (test_bit(vid & VLAN_VID_MASK, active_vlans)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) + ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); +} + +#endif /* _IXGBEVF_TXRX_LIB_H_ */ From 35187df2aa76b0ea99540eec5622f0c7e4cb4d0c Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 24 Oct 2025 14:42:09 +0200 Subject: [PATCH 10/15] ixgbevf: move XDP queue management code to a header Plenty of code can be shared between ZC and normal XDP Tx queues. Expose such code through the previously added header file. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 80 +------------------ .../ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h | 79 ++++++++++++++++++ 2 files changed, 81 insertions(+), 78 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index ea696049ca5e59..18eff6d7c3b115 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -484,8 +484,8 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -static void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, - u16 to_clean) +void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean) { struct libeth_xdpsq_napi_stats stats; u32 ntc = xdp_ring->next_to_clean; @@ -510,88 +510,12 @@ static void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, xdp_flush_frame_bulk(&cbulk); } -static u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) -{ - u16 ntc = xdp_ring->next_to_clean; - u16 to_clean = 0; - - while (likely(to_clean < xdp_ring->pending)) { - u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; - union ixgbe_adv_tx_desc *rs_desc; - - if (!idx--) - break; - - rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); - - if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) - break; - - xdp_ring->xdp_sqes[ntc].rs_idx = 0; - - to_clean += - (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; - - ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; - } - - return to_clean; -} - static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) { ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); } -static u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) -{ - struct ixgbevf_ring *xdp_ring = xdpsq; - - libeth_xdpsq_lock(&xdp_ring->xdpq_lock); - if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { - u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); - - if (likely(to_clean)) - ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); - } - - if (unlikely(!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, - &xdp_ring->state))) { - struct ixgbe_adv_tx_context_desc *context_desc; - - set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &xdp_ring->state); - - context_desc = IXGBEVF_TX_CTXTDESC(xdp_ring, 0); - context_desc->vlan_macip_lens = - cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); - context_desc->fceof_saidx = 0; - context_desc->type_tucmd_mlhl = - cpu_to_le32(IXGBE_TXD_CMD_DEXT | - IXGBE_ADVTXD_DTYP_CTXT); - context_desc->mss_l4len_idx = 0; - - xdp_ring->next_to_use = 1; - xdp_ring->pending = 1; - - /* Finish descriptor writes before bumping tail */ - wmb(); - ixgbevf_write_tail(xdp_ring, 1); - } - - *sq = (struct libeth_xdpsq) { - .count = xdp_ring->count, - .descs = xdp_ring->desc, - .lock = &xdp_ring->xdpq_lock, - .ntu = &xdp_ring->next_to_use, - .pending = &xdp_ring->pending, - .pool = NULL, - .sqes = xdp_ring->xdp_sqes, - }; - - return ixgbevf_desc_unused(xdp_ring); -} - static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, const struct libeth_xdpsq *sq, u64 priv) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h index 3d37e9588eb516..4d4864579e5d14 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -187,4 +187,83 @@ static inline void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); } +static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + u16 to_clean = 0; + + while (likely(to_clean < xdp_ring->pending)) { + u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + union ixgbe_adv_tx_desc *rs_desc; + + if (!idx--) + break; + + rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); + + if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + xdp_ring->xdp_sqes[ntc].rs_idx = 0; + + to_clean += + (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + + ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + } + + return to_clean; +} + +void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean); + +static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); + if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); + + if (likely(to_clean)) + ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); + } + + if (unlikely(!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, + &xdp_ring->state))) { + struct ixgbe_adv_tx_context_desc *context_desc; + + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &xdp_ring->state); + + context_desc = IXGBEVF_TX_CTXTDESC(xdp_ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + xdp_ring->next_to_use = 1; + xdp_ring->pending = 1; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, 1); + } + + *sq = (struct libeth_xdpsq) { + .count = xdp_ring->count, + .descs = xdp_ring->desc, + .lock = &xdp_ring->xdpq_lock, + .ntu = &xdp_ring->next_to_use, + .pending = &xdp_ring->pending, + .pool = NULL, + .sqes = xdp_ring->xdp_sqes, + }; + + return ixgbevf_desc_unused(xdp_ring); +} + #endif /* _IXGBEVF_TXRX_LIB_H_ */ From 03a92107920bbecd5d5d83a9e02582f01db76dd7 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Fri, 24 Oct 2025 14:47:35 +0200 Subject: [PATCH 11/15] ixgbevf: handle single context descriptor on an XDP queue Before starting transmission XDP queue first fills a single context descriptor, on which we cannot check DD bit later. This is not a problem in case of XDP_TX and .ndo_xdp_xmit(), because preparation happens only if we already have packets to send. This is different for ZC though. Wakeup must trigger queue preparation even if no new packets are queued, hence a single context descriptor can block completions. Modify RS-setting logic to account for handle such case. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h index 4d4864579e5d14..d8a8eb189ac67c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -19,9 +19,15 @@ static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) return; ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + + /* We will not get DD on a context descriptor */ + if (unlikely(xdp_ring->xdp_sqes[ltu].type == LIBETH_SQE_CTX)) + return; + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); - xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); + + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; xdp_ring->cached_ntu = xdp_ring->next_to_use; /* Finish descriptor writes before bumping tail */ @@ -247,6 +253,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) xdp_ring->next_to_use = 1; xdp_ring->pending = 1; + xdp_ring->xdp_sqes[0].type = LIBETH_SQE_CTX; /* Finish descriptor writes before bumping tail */ wmb(); From bfb25e46098bf8bacd659e324694683b711a6f57 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 13:32:49 +0100 Subject: [PATCH 12/15] ixgbevf: implement AF_XDP ZC initialization Implement xsk_buff_pool configuration and supporting functionality, such as a single queue pair reconfiguration. Also, properly initialize Rx buffers. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/Makefile | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 33 +++- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 182 +++++++++++++----- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 129 +++++++++++++ .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 12 ++ 5 files changed, 307 insertions(+), 51 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c create mode 100644 drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile index 01d3e892f3fa7b..cdae62f25fd926 100644 --- a/drivers/net/ethernet/intel/ixgbevf/Makefile +++ b/drivers/net/ethernet/intel/ixgbevf/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IXGBEVF) += ixgbevf.o -ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o +ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o ixgbevf_xsk.o ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 26626067e131aa..44e6b2537812fa 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -67,6 +67,7 @@ enum ixgbevf_ring_state_t { __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, __IXGBEVF_TX_XDP_RING_PRIMED, + __IXGBEVF_RXTX_XSK_RING, }; #define ring_is_xdp(ring) \ @@ -76,6 +77,13 @@ enum ixgbevf_ring_state_t { #define clear_ring_xdp(ring) \ clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define ring_is_xsk(ring) \ + test_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define set_ring_xsk(ring) \ + set_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define clear_ring_xsk(ring) \ + clear_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) + struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ @@ -86,21 +94,21 @@ struct ixgbevf_ring { struct device *dev; /* Tx ring */ }; void *desc; /* descriptor ring memory */ - union { - u32 truesize; /* Rx buffer full size */ - u32 pending; /* Sent-not-completed descriptors */ - }; + u32 truesize; /* Rx buffer full size */ u16 count; /* amount of descriptors */ u16 next_to_clean; u32 next_to_use; + u32 pending; /* Sent-not-completed descriptors */ union { struct ixgbevf_tx_buffer *tx_buffer_info; struct libeth_sqe *xdp_sqes; struct libeth_fqe *rx_fqes; + struct libeth_xdp_buff **xsk_fqes; }; struct libeth_xdpsq_lock xdpq_lock; u32 cached_ntu; + u32 thresh; unsigned long state; struct ixgbevf_stats stats; struct u64_stats_sync syncp; @@ -119,8 +127,10 @@ struct ixgbevf_ring { int queue_index; /* needed for multiqueue queue management */ u32 rx_buf_len; struct libeth_xdp_buff_stash xdp_stash; + struct libeth_xdp_buff *xsk_xdp_head; unsigned int dma_size; /* length in bytes */ dma_addr_t dma; /* phys. address of descriptor ring */ + struct xsk_buff_pool *xsk_pool; /* AF_XDP ZC rings */ } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -402,14 +412,29 @@ int ixgbevf_open(struct net_device *netdev); int ixgbevf_close(struct net_device *netdev); void ixgbevf_up(struct ixgbevf_adapter *adapter); void ixgbevf_down(struct ixgbevf_adapter *adapter); +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring); +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); void ixgbevf_reset(struct ixgbevf_adapter *adapter); void ixgbevf_set_ethtool_ops(struct net_device *netdev); int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring); +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter); +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); +int ixgbevf_setup_fq(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring); int ixgbevf_setup_tx_resources(struct ixgbevf_ring *); +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_free_rx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring); +void ixgbevf_rx_fq_destroy(struct ixgbevf_ring *rx_ring); void ixgbevf_free_tx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring); void ixgbevf_update_stats(struct ixgbevf_adapter *adapter); int ethtool_ioctl(struct ifreq *ifr); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 18eff6d7c3b115..c75fc732bcb8cf 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -31,9 +31,10 @@ #include #include #include -#include +#include #include "ixgbevf_txrx_lib.h" +#include "ixgbevf_xsk.h" const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = @@ -1088,7 +1089,7 @@ static inline void ixgbevf_irq_disable(struct ixgbevf_adapter *adapter) * ixgbevf_irq_enable - Enable default interrupt generation settings * @adapter: board private structure **/ -static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -1104,8 +1105,8 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) * * Configure the Tx descriptor ring after a reset. **/ -static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; u64 tdba = ring->dma; @@ -1163,6 +1164,13 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, num_possible_cpus() > adapter->num_xdp_queues); } + ring->xsk_pool = + xsk_get_pool_from_qid(adapter->netdev, ring->queue_index); + if (ring_is_xdp(ring) && ring->xsk_pool) + set_ring_xsk(ring); + else + clear_ring_xsk(ring); + clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -1227,8 +1235,8 @@ static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter) } #define IXGBEVF_MAX_RX_DESC_POLL 10 -static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1254,8 +1262,8 @@ static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, reg_idx); } -static void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1331,8 +1339,8 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, vfmrqc); } -static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; union ixgbe_adv_rx_desc *rx_desc; @@ -1371,6 +1379,7 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending = ixgbevf_desc_unused(ring); ixgbevf_configure_srrctl(adapter, ring, reg_idx); @@ -1388,7 +1397,11 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); ixgbevf_rx_desc_queue_enable(adapter, ring); - ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); + + if (ring_is_xsk(ring)) + ixgbevf_xsk_alloc_rx_bufs(ring, ring->pending); + else + ixgbevf_alloc_rx_buffers(ring, ring->pending); } /** @@ -1749,8 +1762,13 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) * ixgbevf_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ -static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { + if (ring_is_xsk(rx_ring)) { + ixgbevf_rx_xsk_ring_free_buffs(rx_ring); + goto reset; + } + /* Free Rx ring sk_buff */ libeth_xdp_return_stash(&rx_ring->xdp_stash); @@ -1763,15 +1781,17 @@ static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) i = 0; } +reset: rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->pending = 0; } /** * ixgbevf_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned **/ -static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { u16 i = tx_ring->next_to_clean; struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; @@ -1852,7 +1872,7 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); } -static void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring) +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring) { u8 reg_idx = ring->reg_idx; @@ -2852,15 +2872,36 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) return err; } -/** - * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) - * @adapter: board private structure - * @rx_ring: Rx descriptor ring (for a specific queue) to setup - * - * Returns 0 on success, negative on failure - **/ -int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring) +void ixgbevf_rx_fq_destroy(struct ixgbevf_ring *rx_ring) +{ + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + + if (test_and_clear_bit(__IXGBEVF_RXTX_XSK_RING, &rx_ring->state)) { + struct libeth_xskfq xskfq = { + .fqes = rx_ring->xsk_fqes, + }; + + libeth_xskfq_destroy(&xskfq); + rx_ring->xsk_fqes = NULL; + rx_ring->pending = xskfq.pending; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + rx_ring->xsk_pool = NULL; + } else { + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + } +} + +int ixgbevf_setup_fq(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) { struct libeth_fq fq = { .count = rx_ring->count, @@ -2871,8 +2912,42 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, LIBETH_XDP_HEADROOM : LIBETH_SKB_HEADROOM), }; + struct xsk_buff_pool *pool; int ret; + pool = xsk_get_pool_from_qid(rx_ring->netdev, rx_ring->queue_index); + if (adapter->xdp_prog && pool) { + struct libeth_xskfq xskfq = { + .nid = numa_node_id(), + .count = rx_ring->count, + .pool = pool, + }; + + ret = libeth_xskfq_create(&xskfq); + if (ret) + return ret; + + rx_ring->xsk_pool = xskfq.pool; + rx_ring->xsk_fqes = xskfq.fqes; + rx_ring->pending = xskfq.pending; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + set_ring_xsk(rx_ring); + + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0, fq.buf_len); + if (ret) + goto fq_destroy; + + ret = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + rx_ring->xsk_pool); + if (ret) + goto fq_destroy; + + return 0; + } + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); if (ret) return ret; @@ -2882,33 +2957,54 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, rx_ring->truesize = fq.truesize; rx_ring->rx_buf_len = fq.buf_len; + /* XDP RX-queue info */ + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0, fq.buf_len); + if (ret) + goto fq_destroy; + + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); + + return 0; +fq_destroy: + ixgbevf_rx_fq_destroy(rx_ring); + return ret; +} + +/** + * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) + * @adapter: board private structure + * @rx_ring: Rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) +{ + int ret; + + ret = ixgbevf_setup_fq(adapter, rx_ring); + if (ret) + return ret; + u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->dma_size, + rx_ring->desc = dma_alloc_coherent(&adapter->pdev->dev, + rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; - /* XDP RX-queue info */ - ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0, fq.buf_len); - if (ret) - goto err; - - xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); - rx_ring->xdp_prog = adapter->xdp_prog; return 0; err: - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ixgbevf_rx_fq_destroy(rx_ring); dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); return ret; } @@ -2951,24 +3047,15 @@ static int ixgbevf_setup_all_rx_resources(struct ixgbevf_adapter *adapter) **/ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) { - struct libeth_fq fq = { - .fqes = rx_ring->rx_fqes, - .pp = rx_ring->pp, - }; - ixgbevf_clean_rx_ring(rx_ring); rx_ring->xdp_prog = NULL; - xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - dma_free_coherent(fq.pp->p.dev, rx_ring->dma_size, rx_ring->desc, + dma_free_coherent(rx_ring->pp->p.dev, rx_ring->dma_size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ixgbevf_rx_fq_destroy(rx_ring); } /** @@ -3871,6 +3958,9 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return ixgbevf_xdp_setup(dev, xdp->prog); + case XDP_SETUP_XSK_POOL: + return ixgbevf_setup_xsk_pool(netdev_priv(dev), xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c new file mode 100644 index 00000000000000..75128f17fbd063 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2025 Intel Corporation */ + +#include + +#include "ixgbevf.h" +#include "ixgbevf_xsk.h" + +/** + * ixgbevf_single_irq_disable - Mask off interrupt generation on a single vector + * @adapter: board private structure + * @vidx: vector id + **/ +static void ixgbevf_single_irq_disable(struct ixgbevf_adapter *adapter, + u16 vidx) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, + adapter->eims_enable_mask & ~BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, + adapter->eims_enable_mask & ~BIT(vidx)); + + IXGBE_WRITE_FLUSH(hw); + + synchronize_irq(adapter->msix_entries[vidx].vector); +} + +static void ixgbevf_qp_dis(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + struct ixgbevf_ring *ring; + + netif_stop_subqueue(adapter->netdev, qid); + ixgbevf_single_irq_disable(adapter, q_vector->v_idx); + napi_disable(&q_vector->napi); + + ixgbevf_disable_rx_queue(adapter, adapter->rx_ring[qid]); + ixgbevf_clean_rx_ring(rx_ring); + ixgbevf_rx_fq_destroy(rx_ring); + + /* Clean both XDP and normal Tx queue */ + ixgbevf_for_each_ring(ring, q_vector->tx) { + ixgbevf_flush_tx_queue(ring); + ixgbevf_clean_tx_ring(ring); + } +} + +static void ixgbevf_qp_ena(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + struct ixgbevf_ring *ring; + + ixgbevf_setup_fq(adapter, rx_ring); + ixgbevf_configure_rx_ring(adapter, rx_ring); + ixgbevf_for_each_ring(ring, q_vector->tx) + ixgbevf_configure_tx_ring(adapter, ring); + + napi_enable(&q_vector->napi); + ixgbevf_irq_enable(adapter); + netif_start_subqueue(adapter->netdev, qid); +} + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid) +{ + bool running = !test_bit(__IXGBEVF_DOWN, &adapter->state) && + adapter->xdp_prog; + int err; + + if (running) + ixgbevf_qp_dis(adapter, qid); + + err = libeth_xsk_setup_pool(adapter->netdev, qid, !!pool); + + if (running) + ixgbevf_qp_ena(adapter, qid); + + return err; +} + +static void ixgbevf_fill_rx_xsk_desc(const struct libeth_xskfq_fp *fq, u32 i) +{ + union ixgbe_adv_rx_desc *rx_desc = + &((union ixgbe_adv_rx_desc *)fq->descs)[i]; + + rx_desc->read.pkt_addr = + cpu_to_le64(libeth_xsk_buff_xdp_get_dma(fq->fqes[i])); + rx_desc->wb.upper.length = 0; +} + +void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) +{ + struct libeth_xskfq_fp fq = { + .count = rx_ring->count, + .descs = rx_ring->desc, + .fqes = rx_ring->xsk_fqes, + .ntu = rx_ring->next_to_use, + .pool = rx_ring->xsk_pool, + }; + u32 done; + + done = libeth_xskfqe_alloc(&fq, num, ixgbevf_fill_rx_xsk_desc); + if (likely(done)) { + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(rx_ring, fq.ntu); + } + + rx_ring->next_to_use = fq.ntu; + rx_ring->pending -= done; +} + +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean; + + if (rx_ring->xsk_xdp_head) + xsk_buff_free(&rx_ring->xsk_xdp_head->base); + + while (ntc != rx_ring->next_to_use) { + xsk_buff_free(&rx_ring->xsk_fqes[ntc]->base); + ntc++; + ntc = ntc == rx_ring->count ? 0 : ntc; + } +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h new file mode 100644 index 00000000000000..191e9035a9463f --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation. */ + +#ifndef _IXGBEVF_XSK_H_ +#define _IXGBEVF_XSK_H_ + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid); +void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); + +#endif /* _IXGBEVF_XSK_H_ */ From 325a82bc77be00cd84d685e27c87ab98d457fa60 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 14:21:11 +0100 Subject: [PATCH 13/15] ixgbevf: implement AF_XDP zero-copy Tx Add code that handles Tx ZC queues inside of napi_pool(), utilize libeth. As NIC's multiple buffer conventions do not play nicely with AF_XDP's, leave handling of segments for later. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 20 ++++++++-- .../ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h | 6 +-- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 38 ++++++++++++++++++- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 2 + 4 files changed, 58 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index c75fc732bcb8cf..210556f6c8efd0 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -488,6 +488,7 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, u16 to_clean) { + bool xsk_ring = ring_is_xsk(xdp_ring); struct libeth_xdpsq_napi_stats stats; u32 ntc = xdp_ring->next_to_clean; struct xdp_frame_bulk cbulk; @@ -497,11 +498,14 @@ void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, .xss = &stats, .napi = in_napi, }; + u32 xsk_frames = 0; xdp_frame_bulk_init(&cbulk); xdp_ring->pending -= to_clean; while (likely(to_clean--)) { + xsk_frames += xsk_ring && + likely(!xdp_ring->xdp_sqes[ntc].type) ? 1 : 0; libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); ntc++; ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; @@ -509,6 +513,8 @@ void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, xdp_ring->next_to_clean = ntc; xdp_flush_frame_bulk(&cbulk); + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); } static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) @@ -699,10 +705,12 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { - if (ring_is_xdp(ring)) - continue; - if (!ixgbevf_clean_tx_irq(q_vector, ring, budget)) - clean_complete = false; + if (ring_is_xsk(ring)) + clean_complete = ixgbevf_clean_xsk_tx_irq(q_vector, + ring, budget); + else if (!ring_is_xdp(ring)) + clean_complete = ixgbevf_clean_tx_irq(q_vector, + ring, budget); } if (budget <= 0) @@ -1171,6 +1179,10 @@ void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, else clear_ring_xsk(ring); + ring->thresh = ring_is_xsk(ring) ? + libeth_xdp_queue_threshold(ring->count) : + XDP_BULK_QUEUE_SIZE; + clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h index d8a8eb189ac67c..acbf74d9ae5377 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -229,8 +229,8 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) struct ixgbevf_ring *xdp_ring = xdpsq; libeth_xdpsq_lock(&xdp_ring->xdpq_lock); - if (unlikely(ixgbevf_desc_unused(xdp_ring) < LIBETH_XDP_TX_BULK)) { - u16 to_clean = ixgbevf_tx_get_num_sent(xdp_ring); + if (unlikely(ixgbevf_desc_unused(xdp_ring) < xdp_ring->thresh)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdpsq); if (likely(to_clean)) ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); @@ -266,7 +266,7 @@ static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) .lock = &xdp_ring->xdpq_lock, .ntu = &xdp_ring->next_to_use, .pending = &xdp_ring->pending, - .pool = NULL, + .pool = xdp_ring->xsk_pool, .sqes = xdp_ring->xdp_sqes, }; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index 75128f17fbd063..720d53ede23c2e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -3,7 +3,7 @@ #include -#include "ixgbevf.h" +#include "ixgbevf_txrx_lib.h" #include "ixgbevf_xsk.h" /** @@ -127,3 +127,39 @@ void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) ntc = ntc == rx_ring->count ? 0 : ntc; } } + +static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; + + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + IXGBE_TXD_CMD_EOP | + desc.len; + + tx_desc->read.olinfo_status = + cpu_to_le32((desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XSK_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc); +LIBETH_XDP_DEFINE_END(); + +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget) +{ + u32 budget = min_t(u32, napi_budget, tx_ring->thresh); + + return libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, budget, + NULL, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc, + ixgbevf_xdp_rs_and_bump); +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index 191e9035a9463f..0ec81d82b5fb8b 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -8,5 +8,7 @@ int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, struct xsk_buff_pool *pool, u16 qid); void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget); #endif /* _IXGBEVF_XSK_H_ */ From d2d0e737a5821b65a6d3d93321c94634728cffcf Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 14:24:57 +0100 Subject: [PATCH 14/15] ixgbevf: implement AF_XDP zero-copy Rx Add code that handles AF_XDP ZC Rx queues inside of napi_poll(), utilize libeth helpers. Signed-off-by: Larysa Zaremba --- .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 +- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 91 +++++++++++++++++++ .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 2 + 3 files changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 210556f6c8efd0..275b4554e39552 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -725,7 +725,9 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) per_ring_budget = budget; ixgbevf_for_each_ring(ring, q_vector->rx) { - int cleaned = ixgbevf_clean_rx_irq(q_vector, ring, + int cleaned = ring_is_xsk(ring) ? + ixgbevf_clean_xsk_rx_irq(q_vector, ring, budget) : + ixgbevf_clean_rx_irq(q_vector, ring, per_ring_budget); work_done += cleaned; if (cleaned >= per_ring_budget) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index 720d53ede23c2e..1aa4e2f9b8d976 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -151,8 +151,99 @@ static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, LIBETH_XDP_DEFINE_START(); LIBETH_XSK_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx, ixgbevf_prep_xdp_sq, ixgbevf_xsk_xmit_desc); +LIBETH_XSK_DEFINE_RUN_PROG(static ixgbevf_xsk_run_prog, ixgbevf_xsk_flush_tx); +LIBETH_XSK_DEFINE_FINALIZE(static ixgbevf_xsk_finalize_xdp_napi, + ixgbevf_xsk_flush_tx, ixgbevf_xdp_rs_and_bump); LIBETH_XDP_DEFINE_END(); +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget) +{ + struct ixgbevf_adapter *adapter = q_vector->adapter; + u32 total_rx_bytes = 0, total_rx_packets = 0; + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); + struct libeth_xdp_buff *head_xdp; + struct sk_buff *skb; + + head_xdp = rx_ring->xsk_xdp_head; + libeth_xsk_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + + while (likely(total_rx_packets < budget)) { + union ixgbe_adv_rx_desc *rx_desc; + struct libeth_xdp_buff *rx_buffer; + unsigned int size; + + if (unlikely(rx_ring->pending >= rx_ring->thresh)) + ixgbevf_xsk_alloc_rx_bufs(rx_ring, rx_ring->thresh); + + rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.upper.length); + if (unlikely(!size)) + break; + + /* Avoid reading other descriptor fields before checking size */ + rmb(); + + rx_buffer = rx_ring->xsk_fqes[rx_ring->next_to_clean]; + head_xdp = libeth_xsk_process_buff(head_xdp, rx_buffer, size); + if (unlikely(!head_xdp) || ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; + + if (ixgbevf_xsk_run_prog(head_xdp, &xdp_tx_bulk)) { + head_xdp = NULL; + total_rx_packets++; + total_rx_bytes += size; + continue; + } + + skb = xdp_build_skb_from_zc(&head_xdp->base); + head_xdp = NULL; + + if (unlikely(!skb)) { + libeth_xdp_return_buff_slow(head_xdp); + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; + } + + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { + skb = NULL; + continue; + } + + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { + dev_kfree_skb_irq(skb); + continue; + } + + /* populate checksum, VLAN, and protocol */ + ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); + + total_rx_bytes += skb->len; + total_rx_packets++; + + napi_gro_receive(&q_vector->napi, skb); + } + + /* place incomplete frames back on ring for completion */ + rx_ring->xsk_xdp_head = head_xdp; + + ixgbevf_xsk_finalize_xdp_napi(&xdp_tx_bulk); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; + + return total_rx_packets; +} + bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget) { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index 0ec81d82b5fb8b..4c3131f1e2039e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -8,6 +8,8 @@ int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, struct xsk_buff_pool *pool, u16 qid); void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget); bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget); From fc4df3cc32a74cbf4afd6a2a48ee35be36483505 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Mon, 27 Oct 2025 14:26:46 +0100 Subject: [PATCH 15/15] ixgbevf: implement .ndo_xsk_wakeup() and set features To finalize basic AF_XDP implementation, set features and add .ndo_xsk_wakeup() handler. TMP NOTE: IPI variant is incomplete, works through interrupts. Signed-off-by: Larysa Zaremba --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 1 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 5 ++-- .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.c | 25 +++++++++++++++++++ .../net/ethernet/intel/ixgbevf/ixgbevf_xsk.h | 1 + 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 44e6b2537812fa..d4d5d622082987 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -224,6 +224,7 @@ struct ixgbevf_q_vector { IXGBEVF_QV_STATE_POLL_YIELD) spinlock_t lock; #endif /* CONFIG_NET_RX_BUSY_POLL */ + call_single_data_t xsk_csd; /* trigger xsk-related napi */ }; /* microsecond values for various ITR rates shifted by 2 to fit itr register diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 275b4554e39552..2123f848140d39 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2139,6 +2139,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll); + libeth_xsk_init_wakeup(&q_vector->xsk_csd, &q_vector->napi); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; @@ -3995,6 +3996,7 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_features_check = ixgbevf_features_check, .ndo_bpf = ixgbevf_xdp, .ndo_xdp_xmit = ixgbevf_xdp_xmit, + .ndo_xsk_wakeup = ixgbevf_xsk_wakeup, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -4127,8 +4129,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG | - NETDEV_XDP_ACT_REDIRECT; + libeth_xdp_set_features_noredir(netdev, NULL, 1, NULL); /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c index 1aa4e2f9b8d976..666dcb78727db1 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -254,3 +254,28 @@ bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, ixgbevf_xsk_xmit_desc, ixgbevf_xdp_rs_and_bump); } + +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct ixgbevf_q_vector *q_vector; + struct ixgbevf_ring *rx_ring; + + if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(queue_id >= adapter->num_xdp_queues)) + return -EINVAL; + + rx_ring = adapter->rx_ring[queue_id]; + if (unlikely(!ring_is_xsk(rx_ring))) + return -EINVAL; + + q_vector = rx_ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, + BIT(q_vector->v_idx)); + //libeth_xsk_wakeup(&rx_ring->xsk_csd, queue_id); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h index 4c3131f1e2039e..59a7323fda0846 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -12,5 +12,6 @@ u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget); bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *tx_ring, int napi_budget); +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); #endif /* _IXGBEVF_XSK_H_ */